1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/utils/mount_lustre.c
38 * Author: Robert Read <rread@clusterfs.com>
39 * Author: Nathan Rutman <nathan@clusterfs.com>
51 #include <sys/mount.h>
55 #include <lustre_ver.h>
59 #include "mount_utils.h"
61 #define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb"
62 #define MAX_SECTORS_KB_PATH "queue/max_sectors_kb"
63 #define STRIPE_CACHE_SIZE "md/stripe_cache_size"
64 #define MAX_RETRIES 99
71 int md_stripe_cache_size = 16384;
72 char *progname = NULL;
76 fprintf(out, "%s v"LUSTRE_VERSION_STRING"\n", progname);
77 fprintf(out, "\nThis mount helper should only be invoked via the "
78 "mount (8) command,\ne.g. mount -t lustre dev dir\n\n");
79 fprintf(out, "usage: %s [-fhnv] [-o <mntopt>] <device> <mountpt>\n",
82 "\t<device>: the disk device, or for a client:\n"
83 "\t\t<mgmtnid>[:<altmgtnid>...]:/<filesystem>-client\n"
84 "\t<filesystem>: name of the Lustre filesystem (e.g. lustre1)\n"
85 "\t<mountpt>: filesystem mountpoint (e.g. /mnt/lustre)\n"
86 "\t-f|--fake: fake mount (updates /etc/mtab)\n"
87 "\t-o force|--force: force mount even if already in /etc/mtab\n"
88 "\t-h|--help: print this usage message\n"
89 "\t-n|--nomtab: do not update /etc/mtab after mount\n"
90 "\t-v|--verbose: print verbose config settings\n"
91 "\t<mntopt>: one or more comma separated of:\n"
92 "\t\t(no)flock,(no)user_xattr,(no)acl\n"
93 "\t\tabort_recov: abort server recovery handling\n"
94 "\t\tnosvc: only start MGC/MGS obds\n"
95 "\t\tnomgs: only start target obds, using existing MGS\n"
96 "\t\texclude=<ostname>[:<ostname>] : colon-separated list of "
97 "inactive OSTs (e.g. lustre-OST0001)\n"
98 "\t\tretry=<num>: number of times mount is retried by client\n"
99 "\t\tmd_stripe_cache_size=<num>: set the raid stripe cache "
100 "size for the underlying raid if present\n"
102 exit((out != stdout) ? EINVAL : 0);
105 static int check_mtab_entry(char *spec1, char *spec2, char *mtpt, char *type)
110 fp = setmntent(MOUNTED, "r");
114 while ((mnt = getmntent(fp)) != NULL) {
115 if ((strcmp(mnt->mnt_fsname, spec1) == 0 ||
116 strcmp(mnt->mnt_fsname, spec2) == 0) &&
117 strcmp(mnt->mnt_dir, mtpt) == 0 &&
118 strcmp(mnt->mnt_type, type) == 0) {
129 update_mtab_entry(char *spec, char *mtpt, char *type, char *opts,
130 int flags, int freq, int pass)
136 mnt.mnt_fsname = spec;
139 mnt.mnt_opts = opts ? opts : "";
141 mnt.mnt_passno = pass;
143 fp = setmntent(MOUNTED, "a+");
145 fprintf(stderr, "%s: setmntent(%s): %s:",
146 progname, MOUNTED, strerror (errno));
149 if ((addmntent(fp, &mnt)) == 1) {
150 fprintf(stderr, "%s: addmntent: %s:",
151 progname, strerror (errno));
160 /* Get rid of symbolic hostnames for tcp, since kernel can't do lookups */
161 #define MAXNIDSTR 1024
162 static char *convert_hostnames(char *s1)
164 char *converted, *s2 = 0, *c;
166 int left = MAXNIDSTR;
169 converted = malloc(left);
170 if (converted == NULL) {
171 fprintf(stderr, "out of memory: needed %d bytes\n",
176 while ((left > 0) && (*s1 != '/')) {
177 s2 = strpbrk(s1, ",:");
182 nid = libcfs_str2nid(s1);
183 *s2 = sep; /* back to original string */
184 if (nid == LNET_NID_ANY)
186 c += snprintf(c, left, "%s%c", libcfs_nid2str(nid), sep);
187 left = converted + MAXNIDSTR - c;
190 snprintf(c, left, "%s", s1);
193 fprintf(stderr, "%s: Can't parse NID '%s'\n", progname, s1);
198 /*****************************************************************************
200 * This part was cribbed from util-linux/mount/mount.c. There was no clear
201 * license information, but many other files in the package are identified as
202 * GNU GPL, so it's a pretty safe bet that was their intent.
204 ****************************************************************************/
206 const char *opt; /* option name */
207 int inv; /* true if flag value should be inverted */
208 int mask; /* flag mask value */
211 static const struct opt_map opt_map[] = {
212 /*"optname", inv,ms_mask */
213 /* These flags are parsed by mount, not lustre */
214 { "defaults", 0, 0 }, /* default options */
215 { "remount", 0, MS_REMOUNT}, /* remount with different options */
216 { "rw", 1, MS_RDONLY }, /* read-write */
217 { "ro", 0, MS_RDONLY }, /* read-only */
218 { "exec", 1, MS_NOEXEC }, /* permit execution of binaries */
219 { "noexec", 0, MS_NOEXEC }, /* don't execute binaries */
220 { "suid", 1, MS_NOSUID }, /* honor suid executables */
221 { "nosuid", 0, MS_NOSUID }, /* don't honor suid executables */
222 { "dev", 1, MS_NODEV }, /* interpret device files */
223 { "nodev", 0, MS_NODEV }, /* don't interpret devices */
224 { "sync", 0, MS_SYNCHRONOUS}, /* synchronous I/O */
225 { "async", 1, MS_SYNCHRONOUS}, /* asynchronous I/O */
226 { "atime", 1, MS_NOATIME }, /* set file access time on read */
227 { "noatime", 0, MS_NOATIME }, /* do not set file access time on read */
229 { "diratime", 1, MS_NODIRATIME }, /* set file access time on read */
230 { "nodiratime",0,MS_NODIRATIME }, /* do not set file access time on read */
233 { "relatime", 0, MS_RELATIME }, /* set file access time on read */
234 { "norelatime",1,MS_RELATIME }, /* do not set file access time on read */
236 { "auto", 0, 0 }, /* Can be mounted using -a */
237 { "noauto", 0, 0 }, /* Can only be mounted explicitly */
238 { "nousers", 1, 0 }, /* Forbid ordinary user to mount */
239 { "nouser", 1, 0 }, /* Forbid ordinary user to mount */
240 { "noowner", 1, 0 }, /* Device owner has no special privs */
241 { "_netdev", 0, 0 }, /* Device accessible only via network */
245 /****************************************************************************/
247 /* 1 = don't pass on to lustre
248 0 = pass on to lustre */
249 static int parse_one_option(const char *check, int *flagp)
251 const struct opt_map *opt;
253 for (opt = &opt_map[0]; opt->opt != NULL; opt++) {
254 if (strncmp(check, opt->opt, strlen(opt->opt)) == 0) {
257 *flagp &= ~(opt->mask);
264 /* Assume any unknown options are valid and pass them on. The mount
265 will fail if lmd_parse, ll_options or ldiskfs doesn't recognize it.*/
269 static void append_option(char *options, const char *one)
272 strcat(options, ",");
273 strcat(options, one);
276 /* Replace options with subset of Lustre-specific options, and
277 fill in mount flags */
278 int parse_options(char *orig_options, int *flagp)
280 char *options, *opt, *nextopt, *arg, *val;
282 options = calloc(strlen(orig_options) + 1, 1);
284 nextopt = orig_options;
285 while ((opt = strsep(&nextopt, ","))) {
290 /* Handle retries in a slightly different
293 val = strchr(opt, '=');
294 /* please note that some ldiskfs mount options are also in the form
295 * of param=value. We should pay attention not to remove those
296 * mount options, see bug 22097. */
297 if (val && strncmp(arg, "md_stripe_cache_size", 20) == 0) {
298 md_stripe_cache_size = atoi(val + 1);
299 } else if (val && strncmp(arg, "retry", 5) == 0) {
300 retry = atoi(val + 1);
301 if (retry > MAX_RETRIES)
305 } else if (val && strncmp(arg, "mgssec", 6) == 0) {
306 append_option(options, opt);
307 } else if (strncmp(opt, "force", 5) == 0) {
308 //XXX special check for 'force' option
310 printf("force: %d\n", force);
311 } else if (parse_one_option(opt, flagp) == 0) {
312 /* pass this on as an option */
313 append_option(options, opt);
316 strcpy(orig_options, options);
322 int read_file(char *path, char *buf, int size)
326 fd = fopen(path, "r");
330 /* should not ignore fgets(3)'s return value */
331 if (!fgets(buf, size, fd)) {
332 fprintf(stderr, "reading from %s: %s", path, strerror(errno));
340 int write_file(char *path, char *buf)
344 fd = fopen(path, "w");
353 /* This is to tune the kernel for good SCSI performance.
354 * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb
355 * to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */
356 int set_blockdev_tunables(char *source)
359 struct stat stat_buf;
360 char *chk_major, *chk_minor;
363 char buf[PATH_MAX] = {'\0'}, path[PATH_MAX] = {'\0'};
364 char real_path[PATH_MAX] = {'\0'};
371 ret_path = realpath(source, real_path);
372 if (ret_path == NULL) {
374 fprintf(stderr, "warning: %s: cannot resolve: %s\n",
375 source, strerror(errno));
379 if (strncmp(real_path, "/dev/loop", 9) == 0)
382 if ((real_path[0] != '/') && (strpbrk(real_path, ",:") != NULL))
385 snprintf(path, sizeof(path), "/sys/block%s", real_path + 4);
386 if (access(path, X_OK) == 0)
389 /* The name of the device say 'X' specified in /dev/X may not
390 * match any entry under /sys/block/. In that case we need to
391 * match the major/minor number to find the entry under
392 * sys/block corresponding to /dev/X */
393 dev = real_path + strlen(real_path);
394 while (--dev > real_path && isdigit(*dev))
397 if (strncmp(real_path, "/dev/md_", 8) == 0)
400 rc = stat(real_path, &stat_buf);
403 fprintf(stderr, "warning: %s, device %s stat failed\n",
404 strerror(errno), real_path);
408 major = major(stat_buf.st_rdev);
409 minor = minor(stat_buf.st_rdev);
410 rc = glob("/sys/block/*", GLOB_NOSORT, NULL, &glob_info);
413 fprintf(stderr, "warning: failed to read entries under "
418 for (i = 0; i < glob_info.gl_pathc; i++){
419 snprintf(path, sizeof(path), "%s/dev", glob_info.gl_pathv[i]);
421 rc = read_file(path, buf, sizeof(buf));
425 if (buf[strlen(buf) - 1] == '\n')
426 buf[strlen(buf) - 1] = '\0';
428 chk_major = strtok_r(buf, ":", &savept);
430 if (major == atoi(chk_major) &&minor == atoi(chk_minor))
434 if (i == glob_info.gl_pathc) {
436 fprintf(stderr,"warning: device %s does not match any "
437 "entry under /sys/block\n", real_path);
438 globfree(&glob_info);
442 /* Chop off "/dev" from path we found */
443 path[strlen(glob_info.gl_pathv[i])] = '\0';
444 globfree(&glob_info);
447 if (strncmp(real_path, "/dev/md", 7) == 0) {
448 snprintf(real_path, sizeof(real_path), "%s/%s", path,
451 rc = read_file(real_path, buf, sizeof(buf));
454 fprintf(stderr, "warning: opening %s: %s\n",
455 real_path, strerror(errno));
459 if (atoi(buf) >= md_stripe_cache_size)
462 if (strlen(buf) - 1 > 0) {
463 snprintf(buf, sizeof(buf), "%d", md_stripe_cache_size);
464 rc = write_file(real_path, buf);
466 fprintf(stderr, "warning: opening %s: %s\n",
467 real_path, strerror(errno));
469 /* Return since raid and disk tunables are different */
473 snprintf(real_path, sizeof(real_path), "%s/%s", path,
474 MAX_HW_SECTORS_KB_PATH);
475 rc = read_file(real_path, buf, sizeof(buf));
478 fprintf(stderr, "warning: opening %s: %s\n",
479 real_path, strerror(errno));
483 if (strlen(buf) - 1 > 0) {
484 snprintf(real_path, sizeof(real_path), "%s/%s", path,
485 MAX_SECTORS_KB_PATH);
486 rc = write_file(real_path, buf);
488 fprintf(stderr, "warning: writing to %s: %s\n",
489 real_path, strerror(errno));
494 int main(int argc, char *const argv[])
496 char default_options[] = "";
497 char *usource, *source, *target, *ptr;
498 char *options, *optcopy, *orig_options = default_options;
499 int i, nargs = 3, opt, rc, flags, optlen;
500 static struct option long_opt[] = {
504 {"nomtab", 0, 0, 'n'},
505 {"options", 1, 0, 'o'},
506 {"verbose", 0, 0, 'v'},
510 progname = strrchr(argv[0], '/');
511 progname = progname ? progname + 1 : argv[0];
513 while ((opt = getopt_long(argc, argv, "fhno:v",
514 long_opt, NULL)) != EOF){
518 printf("force: %d\n", force);
523 printf("fake: %d\n", fake);
531 printf("nomtab: %d\n", nomtab);
535 orig_options = optarg;
543 fprintf(stderr, "%s: unknown option '%c'\n",
550 if (optind + 2 > argc) {
551 fprintf(stderr, "%s: too few arguments\n", progname);
555 usource = argv[optind];
560 source = convert_hostnames(usource);
565 target = argv[optind + 1];
566 ptr = target + strlen(target) - 1;
567 while ((ptr > target) && (*ptr == '/')) {
573 for (i = 0; i < argc; i++)
574 printf("arg[%d] = %s\n", i, argv[i]);
575 printf("source = %s (%s), target = %s\n", usource, source,
577 printf("options = %s\n", orig_options);
580 options = malloc(strlen(orig_options) + 1);
581 if (options == NULL) {
582 fprintf(stderr, "can't allocate memory for options\n");
585 strcpy(options, orig_options);
586 rc = parse_options(options, &flags);
588 fprintf(stderr, "%s: can't parse options: %s\n",
594 rc = check_mtab_entry(usource, source, target, "lustre");
595 if (rc && !(flags & MS_REMOUNT)) {
596 fprintf(stderr, "%s: according to %s %s is "
597 "already mounted on %s\n",
598 progname, MOUNTED, usource, target);
601 if (!rc && (flags & MS_REMOUNT)) {
602 fprintf(stderr, "%s: according to %s %s is "
603 "not already mounted on %s\n",
604 progname, MOUNTED, usource, target);
608 if (flags & MS_REMOUNT)
611 rc = access(target, F_OK);
614 fprintf(stderr, "%s: %s inaccessible: %s\n", progname, target,
619 /* In Linux 2.4, the target device doesn't get passed to any of our
620 functions. So we'll stick it on the end of the options. */
621 optlen = strlen(options) + strlen(",device=") + strlen(source) + 1;
622 optcopy = malloc(optlen);
623 if (optcopy == NULL) {
624 fprintf(stderr, "can't allocate memory to optcopy\n");
627 strcpy(optcopy, options);
629 strcat(optcopy, ",");
630 strcat(optcopy, "device=");
631 strcat(optcopy, source);
634 printf("mounting device %s at %s, flags=%#x options=%s\n",
635 source, target, flags, optcopy);
637 if (!strstr(usource, ":/") && set_blockdev_tunables(source)) {
639 fprintf(stderr, "%s: unable to set tunables for %s"
640 " (may cause reduced IO performance)\n",
644 register_service_tags(usource, source, target);
647 /* flags and target get to lustre_get_sb, but not
648 lustre_fill_super. Lustre ignores the flags, but mount
650 for (i = 0, rc = -EAGAIN; i <= retry && rc != 0; i++) {
651 rc = mount(source, target, "lustre", flags,
655 fprintf(stderr, "%s: mount %s at %s "
656 "failed: %s retries left: "
657 "%d\n", basename(progname),
659 strerror(errno), retry-i);
663 sleep(1 << max((i/2), 5));
677 cli = strrchr(usource, ':');
678 if (cli && (strlen(cli) > 2))
683 fprintf(stderr, "%s: mount %s at %s failed: %s\n", progname,
684 usource, target, strerror(errno));
686 fprintf(stderr, "Are the lustre modules loaded?\n"
687 "Check /etc/modprobe.conf and /proc/filesystems"
688 "\nNote 'alias lustre llite' should be removed"
689 " from modprobe.conf\n");
690 if (errno == ENOTBLK)
691 fprintf(stderr, "Do you need -o loop?\n");
692 if (errno == ENOMEDIUM)
694 "This filesystem needs at least 1 OST\n");
695 if (errno == ENOENT) {
696 fprintf(stderr, "Is the MGS specification correct?\n");
697 fprintf(stderr, "Is the filesystem name correct?\n");
698 fprintf(stderr, "If upgrading, is the copied client log"
699 " valid? (see upgrade docs)\n");
701 if (errno == EALREADY)
702 fprintf(stderr, "The target service is already running."
705 fprintf(stderr, "The target service failed to start "
706 "(bad config log?) (%s). "
707 "See /var/log/messages.\n", usource);
709 fprintf(stderr, "Is the MGS running?\n");
710 if (errno == EADDRINUSE)
711 fprintf(stderr, "The target service's index is already "
712 "in use. (%s)\n", usource);
713 if (errno == EINVAL) {
714 fprintf(stderr, "This may have multiple causes.\n");
716 fprintf(stderr, "Is '%s' the correct filesystem"
718 fprintf(stderr, "Are the mount options correct?\n");
719 fprintf(stderr, "Check the syslog for more info.\n");
722 /* May as well try to clean up loop devs */
723 if (strncmp(usource, "/dev/loop", 9) == 0) {
726 sprintf(cmd, "/sbin/losetup -d %s", usource);
727 if ((ret = system(cmd)) < 0)
730 rc = WEXITSTATUS(ret);
733 } else if (!nomtab) {
734 rc = update_mtab_entry(usource, target, "lustre", orig_options,