1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002 Cluster File Systems, Inc.
5 * Author: Lin Song Tao <lincent@clusterfs.com>
6 * Author: Nathan Rutman <nathan@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
32 #include <sys/types.h>
34 #include <sys/mount.h>
39 #include <linux/types.h>
41 #include <linux/fs.h> // for BLKGETSIZE64
42 #include <linux/lustre_disk.h>
43 #include <lnet/lnetctl.h>
46 /* So obd.o will link */
48 command_t cmdlist[] = {
52 #define MAX_LOOP_DEVICES 16
53 #define L_BLOCK_SIZE 4096
55 static char *progname;
56 static int verbose = 1;
57 static int print_only = 0;
62 fprintf(out, "usage: %s <target types> [options] <device>\n", progname);
65 "\t<device>:block device or file (e.g /dev/sda or /tmp/ost1)\n"
67 "\t\t--ost: object storage, mutually exclusive with mdt\n"
68 "\t\t--mdt: metadata storage, mutually exclusive with ost\n"
69 "\t\t--mgs: configuration management service - one per site\n"
71 "\t\t--mgsnid=<nid>[,<...>] : NID(s) of a remote mgs node\n"
72 "\t\t\trequired for all targets other than the mgs node\n"
73 "\t\t--fsname=<filesystem_name> : default is 'lustre'\n"
74 #if 0 /* FIXME implement 1.6.x */
75 "\t\t--configdev=<altdevice|file>: store configuration info\n"
76 "\t\t\tfor this device on an alternate device\n"
78 "\t\t--failover=<nid>[,<...>] : list of NIDs for the failover\n"
79 "\t\t\tpartners for this target\n"
80 "\t\t--backfstype=<fstype> : backing fs type (ext3, ldiskfs)\n"
81 "\t\t--device-size=#N(KB) : device size for loop devices\n"
82 "\t\t--stripe-count=#N : default number of stripes\n"
83 "\t\t--stripe-size=#N(KB) : default stripe size\n"
84 "\t\t--index=#N : target index\n"
85 "\t\t--mountfsoptions=<opts> : permanent mount options\n"
87 "\t\t--mkfsoptions=<opts> : format options\n"
88 "\t\t--reformat: overwrite an existing disk\n"
90 "\t\t--nomgs: turn off MGS service on this MDT\n"
92 "\t\t--print: just report what we would do; don't write to "
94 "\t\t--timeout=<secs> : system timeout period\n"
100 #define vprint if (verbose > 0) printf
102 static void fatal(void)
105 fprintf(stderr, "\n%s FATAL: ", progname);
108 /*================ utility functions =====================*/
111 dev_major (unsigned long long int __dev)
113 return ((__dev >> 8) & 0xfff) | ((unsigned int) (__dev >> 32) & ~0xfff);
117 dev_minor (unsigned long long int __dev)
119 return (__dev & 0xff) | ((unsigned int) (__dev >> 12) & ~0xff);
124 static int version = 0;
128 char release[4] = "";
130 fd = open("/proc/sys/kernel/osrelease", O_RDONLY);
132 fprintf(stderr, "%s: Warning: Can't resolve kernel "
133 "version, assuming 2.6\n", progname);
135 read(fd, release, 4);
138 if (strncmp(release, "2.4.", 4) == 0)
146 int run_command(char *cmd)
151 printf("cmd: %s\n", cmd);
153 strcat(cmd, " 2>&1");
160 /*============ disk dev functions ===================*/
162 /* Setup a file in the first unused loop_device */
163 int loop_setup(struct mkfs_opts *mop)
169 /* Figure out the loop device names */
170 if (!access("/dev/loop0", F_OK | R_OK))
171 strcpy(loop_base, "/dev/loop\0");
172 else if (!access("/dev/loop/0", F_OK | R_OK))
173 strcpy(loop_base, "/dev/loop/\0");
175 fprintf(stderr, "%s: can't access loop devices\n", progname);
179 /* Find unused loop device */
180 for (i = 0; i < MAX_LOOP_DEVICES; i++) {
182 sprintf(l_device, "%s%d", loop_base, i);
183 if (access(l_device, F_OK | R_OK))
185 sprintf(cmd, "losetup %s > /dev/null", l_device);
186 ret = run_command(cmd);
187 /* losetup gets 1 (ret=256) for non-set-up device */
189 /* Set up a loopback device to our file */
190 sprintf(cmd, "losetup %s %s", l_device, mop->mo_device);
191 ret = run_command(cmd);
193 fprintf(stderr, "%s: error %d on losetup: %s\n",
194 progname, ret, strerror(ret));
197 strcpy(mop->mo_loopdev, l_device);
202 fprintf(stderr, "%s: out of loop devices!\n", progname);
206 int loop_cleanup(struct mkfs_opts *mop)
210 if ((mop->mo_flags & MO_IS_LOOP) && *mop->mo_loopdev) {
211 sprintf(cmd, "losetup -d %s", mop->mo_loopdev);
212 ret = run_command(cmd);
217 /* Determine if a device is a block device (as opposed to a file) */
218 int is_block(char* devname)
223 ret = access(devname, F_OK);
226 ret = stat(devname, &st);
228 fprintf(stderr, "%s: cannot stat %s\n", progname, devname);
231 return S_ISBLK(st.st_mode);
234 __u64 get_device_size(char* device)
239 fd = open(device, O_RDONLY);
241 fprintf(stderr, "%s: cannot open %s: %s\n",
242 progname, device, strerror(errno));
246 /* size in bytes. bz5831 */
247 ret = ioctl(fd, BLKGETSIZE64, (void*)&size);
250 fprintf(stderr, "%s: size ioctl failed: %s\n",
251 progname, strerror(errno));
255 vprint("device size = "LPU64"MB\n", size >> 20);
256 /* return value in KB */
260 int loop_format(struct mkfs_opts *mop)
264 if (mop->mo_device_sz == 0) {
266 fprintf(stderr, "loop device requires a --device-size= "
271 ret = creat(mop->mo_device, S_IRUSR|S_IWUSR);
272 ret = truncate(mop->mo_device, mop->mo_device_sz * 1024);
275 fprintf(stderr, "%s: Unable to create backing store: %d\n",
282 /* Check whether the file exists in the device */
283 static int file_in_dev(char *file_name, char *dev_name)
286 char debugfs_cmd[256];
287 unsigned int inode_num;
290 /* Construct debugfs command line. */
291 memset(debugfs_cmd, 0, sizeof(debugfs_cmd));
293 "debugfs -c -R 'stat %s' %s 2>&1 | egrep '(Inode|unsupported)'",
294 file_name, dev_name);
296 fp = popen(debugfs_cmd, "r");
298 fprintf(stderr, "%s: %s\n", progname, strerror(errno));
302 if (fscanf(fp, "Inode: %u", &inode_num) == 1) { /* exist */
306 i = fread(debugfs_cmd, 1, sizeof(debugfs_cmd), fp);
308 /* Filesystem has unsupported feature */
309 vprint("%.*s", i, debugfs_cmd);
310 /* in all likelihood, the "unsupported feature" is
311 'extents', which older debugfs does not understand.
312 Use e2fsprogs-1.38-cfs1 or later, available from
313 ftp://ftp.lustre.org/pub/lustre/other/e2fsprogs/ */
320 /* Check whether the device has already been fomatted by mkfs.lustre */
321 static int is_lustre_target(struct mkfs_opts *mop)
324 /* Check whether there exist MOUNT_DATA_FILE,
325 LAST_RCVD or CATLIST in the device. */
326 vprint("checking for existing Lustre data\n");
328 if ((rc = file_in_dev(MOUNT_DATA_FILE, mop->mo_device))
329 || (rc = file_in_dev(LAST_RCVD, mop->mo_device))
330 || (rc = file_in_dev(CATLIST, mop->mo_device))) {
331 vprint("found Lustre data\n");
332 /* in the -1 case, 'extents' means this really IS a lustre
337 return 0; /* The device is not a lustre target. */
340 /* Build fs according to type */
341 int make_lustre_backfs(struct mkfs_opts *mop)
349 if (mop->mo_device_sz != 0) {
350 if (mop->mo_device_sz < 8096){
351 fprintf(stderr, "%s: size of filesystem must be larger "
352 "than 8MB, but is set to %lldKB\n",
353 progname, mop->mo_device_sz);
356 block_count = mop->mo_device_sz / (L_BLOCK_SIZE >> 10);
359 if ((mop->mo_ldd.ldd_mount_type == LDD_MT_EXT3) ||
360 (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS)) {
361 __u64 device_sz = mop->mo_device_sz;
363 /* we really need the size */
364 if (device_sz == 0) {
365 device_sz = get_device_size(mop->mo_device);
370 /* Journal size in MB */
371 if (strstr(mop->mo_mkfsopts, "-J") == NULL) {
372 /* Choose our own default journal size */
373 long journal_sz = 0, max_sz;
374 if (device_sz > 1024 * 1024) /* 1GB */
375 journal_sz = (device_sz / 102400) * 4;
377 max_sz = (102400 * L_BLOCK_SIZE) >> 20; /* 400MB */
378 if (journal_sz > max_sz)
381 sprintf(buf, " -J size=%ld", journal_sz);
382 strcat(mop->mo_mkfsopts, buf);
386 /* Default bytes_per_inode is block size */
387 if (strstr(mop->mo_mkfsopts, "-i") == NULL) {
388 long bytes_per_inode = 0;
390 if (IS_MDT(&mop->mo_ldd))
391 bytes_per_inode = 4096;
393 /* Allocate fewer inodes on large OST devices. Most
394 filesystems can be much more aggressive than even
396 if ((IS_OST(&mop->mo_ldd) && (device_sz > 1000000)))
397 bytes_per_inode = 16384;
399 if (bytes_per_inode > 0) {
400 sprintf(buf, " -i %ld", bytes_per_inode);
401 strcat(mop->mo_mkfsopts, buf);
405 /* This is an undocumented mke2fs option. Default is 128. */
406 if (strstr(mop->mo_mkfsopts, "-I") == NULL) {
408 if (IS_MDT(&mop->mo_ldd)) {
409 if (mop->mo_ldd.ldd_stripe_count > 77)
410 inode_size = 512; /* bz 7241 */
411 else if (mop->mo_ldd.ldd_stripe_count > 34)
413 else if (mop->mo_ldd.ldd_stripe_count > 13)
417 } else if (IS_OST(&mop->mo_ldd)) {
418 /* now as we store fids in EA on OST we need
419 to make inode bigger */
423 if (inode_size > 0) {
424 sprintf(buf, " -I %ld", inode_size);
425 strcat(mop->mo_mkfsopts, buf);
431 strcat(mop->mo_mkfsopts, " -q");
434 /* Enable hashed b-tree directory lookup in large dirs bz6224 */
435 if (strstr(mop->mo_mkfsopts, "-O") == NULL) {
436 strcat(mop->mo_mkfsopts, " -O dir_index");
439 sprintf(mkfs_cmd, "mkfs.ext2 -j -b %d -L %s ", L_BLOCK_SIZE,
440 mop->mo_ldd.ldd_svname);
442 } else if (mop->mo_ldd.ldd_mount_type == LDD_MT_REISERFS) {
443 long journal_sz = 0; /* FIXME default journal size */
444 if (journal_sz > 0) {
445 sprintf(buf, " --journal_size %ld", journal_sz);
446 strcat(mop->mo_mkfsopts, buf);
448 sprintf(mkfs_cmd, "mkreiserfs -ff ");
451 fprintf(stderr,"%s: unsupported fs type: %d (%s)\n",
452 progname, mop->mo_ldd.ldd_mount_type,
453 MT_STR(&mop->mo_ldd));
458 dev = mop->mo_device;
459 if (mop->mo_flags & MO_IS_LOOP)
460 dev = mop->mo_loopdev;
462 vprint("formatting backing filesystem %s on %s\n",
463 MT_STR(&mop->mo_ldd), dev);
464 vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname);
465 vprint("\t4k blocks %d\n", block_count);
466 vprint("\toptions %s\n", mop->mo_mkfsopts);
468 /* mkfs_cmd's trailing space is important! */
469 strcat(mkfs_cmd, mop->mo_mkfsopts);
470 strcat(mkfs_cmd, " ");
471 strcat(mkfs_cmd, dev);
472 if (block_count != 0) {
473 sprintf(buf, " %d", block_count);
474 strcat(mkfs_cmd, buf);
477 vprint("mkfs_cmd = %s\n", mkfs_cmd);
478 ret = run_command(mkfs_cmd);
481 fprintf(stderr, "Unable to build fs %s (%d)\n", dev, ret);
489 /* ==================== Lustre config functions =============*/
491 void print_ldd(char *str, struct lustre_disk_data *ldd)
494 printf("\n %s:\n", str);
495 printf("Target: %s\n", ldd->ldd_svname);
496 printf("Index: %d\n", ldd->ldd_svindex);
497 printf("UUID: %s\n", (char *)ldd->ldd_uuid);
498 printf("Lustre FS: %s\n", ldd->ldd_fsname);
499 printf("Mount type: %s\n", MT_STR(ldd));
500 printf("Flags: %#x\n", ldd->ldd_flags);
501 printf(" (%s%s%s%s%s%s)\n",
502 IS_MDT(ldd) ? "MDT ":"",
503 IS_OST(ldd) ? "OST ":"",
504 IS_MGS(ldd) ? "MGS ":"",
505 ldd->ldd_flags & LDD_F_NEED_INDEX ? "needs_index ":"",
506 ldd->ldd_flags & LDD_F_NEED_REGISTER ? "must_register ":"",
507 ldd->ldd_flags & LDD_F_UPGRADE14 ? "upgrade1.4 ":"");
508 printf("Persistent mount opts: %s\n", ldd->ldd_mount_opts);
509 printf("MGS nids: ");
510 for (i = 0; i < ldd->ldd_mgsnid_count; i++) {
511 printf("%c %s", (i == 0) ? ' ' : ',',
512 libcfs_nid2str(ldd->ldd_mgsnid[i]));
514 printf("\nFailover nids: ");
515 for (i = 0; i < ldd->ldd_failnid_count; i++) {
516 printf("%c %s", (i == 0) ? ' ' : ',',
517 libcfs_nid2str(ldd->ldd_failnid[i]));
523 /* Write the server config files */
524 int write_local_files(struct mkfs_opts *mop)
526 char mntpt[] = "/tmp/mntXXXXXX";
532 /* Mount this device temporarily in order to write these files */
533 vprint("mounting backing device\n");
534 if (!mkdtemp(mntpt)) {
535 fprintf(stderr, "%s: Can't create temp mount point %s: %s\n",
536 progname, mntpt, strerror(errno));
540 dev = mop->mo_device;
541 if (mop->mo_flags & MO_IS_LOOP)
542 dev = mop->mo_loopdev;
544 ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0, NULL);
546 fprintf(stderr, "%s: Unable to mount %s: %d\n",
547 progname, mop->mo_device, ret);
551 /* Set up initial directories */
552 sprintf(filepnm, "%s/%s", mntpt, MOUNT_CONFIGS_DIR);
553 ret = mkdir(filepnm, 0777);
554 if ((ret != 0) && (errno != EEXIST)) {
555 fprintf(stderr, "%s: Can't make configs dir %s (%d)\n",
556 progname, filepnm, ret);
558 } else if (errno == EEXIST) {
562 /* Save the persistent mount data into a file. Lustre must pre-read
563 this file to get the real mount options. */
564 vprint("Writing %s\n", MOUNT_DATA_FILE);
565 sprintf(filepnm, "%s/%s", mntpt, MOUNT_DATA_FILE);
566 filep = fopen(filepnm, "w");
568 fprintf(stderr, "%s: Unable to create %s file\n",
572 fwrite(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep);
577 /* Check for upgrade */
578 if ((mop->mo_ldd.ldd_flags & (LDD_F_UPGRADE14 | LDD_F_SV_TYPE_MGS))
579 == (LDD_F_UPGRADE14 | LDD_F_SV_TYPE_MGS)) {
582 vprint("Copying old logs\n");
583 /* Copy the old client log to fsname-client */
584 sprintf(filepnm, "%s/%s/%s-client",
585 mntpt, MOUNT_CONFIGS_DIR, mop->mo_ldd.ldd_fsname);
586 sprintf(cmd, "cp %s/%s/client %s", mntpt, MDT_LOGS_DIR,
589 printf("cmd: %s\n", cmd);
590 ret = run_command(cmd);
592 fprintf(stderr, "%s: Can't copy 1.4 config %s/client "
593 "(%d)\n", progname, MDT_LOGS_DIR, ret);
594 fprintf(stderr, "mount -t ext3 %s somewhere, "
595 "find the client log for fs %s and "
596 "copy it manually into %s/%s-client, "
599 mop->mo_ldd.ldd_fsname, MOUNT_CONFIGS_DIR,
600 mop->mo_ldd.ldd_fsname);
603 /* Copy the old mdt log to fsname-MDT0000 (get old
604 name from mdt_UUID) */
606 strcpy(filepnm, mop->mo_ldd.ldd_uuid);
607 term = strstr(filepnm, "_UUID");
610 sprintf(cmd, "cp %s/%s/%s %s/%s/%s",
611 mntpt, MDT_LOGS_DIR, filepnm,
612 mntpt, MOUNT_CONFIGS_DIR,
613 mop->mo_ldd.ldd_svname);
615 printf("cmd: %s\n", cmd);
616 ret = run_command(cmd);
619 fprintf(stderr, "%s: Can't copy 1.4 config %s/%s "
620 "(%d)\n", progname, MDT_LOGS_DIR, filepnm, ret);
621 fprintf(stderr, "mount -t ext3 %s somewhere, "
622 "find the MDT log for fs %s and "
623 "copy it manually into %s/%s, "
626 mop->mo_ldd.ldd_fsname, MOUNT_CONFIGS_DIR,
627 mop->mo_ldd.ldd_svname);
636 vprint("unmounting backing device\n");
643 int read_local_files(struct mkfs_opts *mop)
645 char mntpt[] = "/tmp/mntXXXXXX";
651 /* Mount this device temporarily in order to read these files */
652 vprint("mounting backing device\n");
653 if (!mkdtemp(mntpt)) {
654 fprintf(stderr, "%s: Can't create temp mount point %s: %s\n",
655 progname, mntpt, strerror(errno));
659 dev = mop->mo_device;
660 if (mop->mo_flags & MO_IS_LOOP)
661 dev = mop->mo_loopdev;
663 ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0, NULL);
665 fprintf(stderr, "%s: Unable to mount %s: %s\n",
666 progname, mop->mo_device, strerror(ret));
670 sprintf(filepnm, "%s/%s", mntpt, MOUNT_DATA_FILE);
671 filep = fopen(filepnm, "r");
673 vprint("Reading %s\n", MOUNT_DATA_FILE);
674 fread(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep);
677 /* Try to read pre-1.6 config from last_rcvd */
678 struct lr_server_data lsd;
679 fprintf(stderr, "%s: Unable to read %s, trying last_rcvd\n",
680 progname, MOUNT_DATA_FILE);
681 sprintf(filepnm, "%s/%s", mntpt, LAST_RCVD);
682 filep = fopen(filepnm, "r");
684 fprintf(stderr, "%s: Unable to read old data\n",
689 vprint("Reading %s\n", LAST_RCVD);
690 fread(&lsd, sizeof(lsd), 1, filep);
692 if (lsd.lsd_feature_compat & OBD_COMPAT_OST) {
693 mop->mo_ldd.ldd_flags = LDD_F_SV_TYPE_OST;
694 mop->mo_ldd.ldd_svindex = lsd.lsd_ost_index;
695 } else if (lsd.lsd_feature_compat & OBD_COMPAT_MDT) {
696 /* We must co-locate so mgs can see old logs.
697 If user doesn't want this, they can copy the old
698 logs manually and re-tunefs. */
699 mop->mo_ldd.ldd_flags =
700 LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_MGS;
701 mop->mo_ldd.ldd_svindex = lsd.lsd_mdt_index;
703 /* If neither is set, we're pre-1.4.6, make a guess. */
704 sprintf(filepnm, "%s/%s", mntpt, MDT_LOGS_DIR);
705 if (lsd.lsd_ost_index > 0) {
706 mop->mo_ldd.ldd_flags = LDD_F_SV_TYPE_OST;
707 mop->mo_ldd.ldd_svindex = lsd.lsd_ost_index;
709 if ((ret = access(filepnm, F_OK)) == 0) {
710 mop->mo_ldd.ldd_flags =
713 /* Old MDT's are always index 0
715 mop->mo_ldd.ldd_svindex = 0;
717 /* The index won't be correct */
718 mop->mo_ldd.ldd_flags =
719 LDD_F_SV_TYPE_OST | LDD_F_NEED_INDEX;
724 memcpy(mop->mo_ldd.ldd_uuid, lsd.lsd_uuid,
725 sizeof(mop->mo_ldd.ldd_uuid));
726 mop->mo_ldd.ldd_flags |= LDD_F_UPGRADE14;
732 vprint("unmounting backing device\n");
740 void set_defaults(struct mkfs_opts *mop)
742 mop->mo_ldd.ldd_magic = LDD_MAGIC;
743 mop->mo_ldd.ldd_config_ver = 1;
744 mop->mo_ldd.ldd_flags = LDD_F_NEED_INDEX | LDD_F_NEED_REGISTER;
745 mop->mo_ldd.ldd_mgsnid_count = 0;
746 strcpy(mop->mo_ldd.ldd_fsname, "lustre");
747 if (get_os_version() == 24)
748 mop->mo_ldd.ldd_mount_type = LDD_MT_EXT3;
750 mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS;
752 mop->mo_ldd.ldd_svindex = -1;
753 mop->mo_ldd.ldd_stripe_count = 1;
754 mop->mo_ldd.ldd_stripe_sz = 1024 * 1024;
755 mop->mo_ldd.ldd_stripe_pattern = 0;
758 static inline void badopt(const char *opt, char *type)
760 fprintf(stderr, "%s: '--%s' only valid for %s\n",
761 progname, opt, type);
765 int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop,
768 static struct option long_opt[] = {
769 {"backfstype", 1, 0, 'b'},
770 {"configdev", 1, 0, 'C'},
771 {"device-size", 1, 0, 'd'},
772 {"fsname",1, 0, 'n'},
773 {"failover", 1, 0, 'f'},
777 {"mgsnid", 1, 0, 'm'},
778 {"mkfsoptions", 1, 0, 'k'},
779 {"mountfsoptions", 1, 0, 'o'},
780 {"nomgs", 0, 0, 'N'},
782 {"print", 0, 0, 'p'},
783 {"quiet", 0, 0, 'q'},
784 {"reformat", 0, 0, 'r'},
785 {"startupwait", 1, 0, 'w'},
786 {"stripe-count", 1, 0, 'c'},
787 {"stripe-size", 1, 0, 's'},
788 {"stripe-index", 1, 0, 'i'},
789 {"index", 1, 0, 'i'},
790 {"timeout", 1, 0, 't'},
791 {"verbose", 0, 0, 'v'},
794 char *optstring = "b:C:d:n:f:hI:MGm:k:No:Opqrw:c:s:i:t:v";
798 while ((opt = getopt_long(argc, argv, optstring, long_opt, &longidx)) !=
803 while (i < LDD_MT_LAST) {
804 if (strcmp(optarg, mt_str(i)) == 0) {
805 mop->mo_ldd.ldd_mount_type = i;
813 if (IS_MDT(&mop->mo_ldd)) {
814 int stripe_count = atol(optarg);
815 if (stripe_count <= 0) {
816 fprintf(stderr, "%s: bad stripe count "
817 "%d\n", progname, stripe_count);
820 mop->mo_ldd.ldd_stripe_count = stripe_count;
822 badopt(long_opt[longidx].name, "MDT");
826 case 'C': /* Configdev */
828 printf("Configdev not implemented\n");
831 mop->mo_device_sz = atol(optarg);
835 char *s1 = optarg, *s2;
836 while ((s2 = strsep(&s1, ","))) {
837 mop->mo_ldd.ldd_failnid[i++] =
839 if (i >= MTI_NIDS_MAX) {
840 fprintf(stderr, "%s: too many failover "
841 "nids, ignoring %s...\n",
846 mop->mo_ldd.ldd_failnid_count = i;
850 mop->mo_ldd.ldd_flags |= LDD_F_SV_TYPE_MGS;
856 if (IS_MDT(&mop->mo_ldd) || IS_OST(&mop->mo_ldd)) {
857 mop->mo_ldd.ldd_svindex = atoi(optarg);
858 mop->mo_ldd.ldd_flags &= ~LDD_F_NEED_INDEX;
860 badopt(long_opt[longidx].name, "MDT,OST");
865 strncpy(mop->mo_mkfsopts, optarg,
866 sizeof(mop->mo_mkfsopts) - 1);
870 char *s1 = optarg, *s2;
871 if (IS_MGS(&mop->mo_ldd)) {
872 badopt(long_opt[longidx].name,
876 while ((s2 = strsep(&s1, ","))) {
877 mop->mo_ldd.ldd_mgsnid[i++] =
879 if (i >= MTI_NIDS_MAX) {
880 fprintf(stderr, "%s: too many MGS nids,"
886 mop->mo_ldd.ldd_mgsnid_count = i;
890 mop->mo_ldd.ldd_flags |= LDD_F_SV_TYPE_MDT;
893 if (!(IS_MDT(&mop->mo_ldd) || IS_OST(&mop->mo_ldd))) {
894 badopt(long_opt[longidx].name, "MDT,OST");
897 if (strlen(optarg) > 8) {
898 fprintf(stderr, "%s: filesystem name must be "
899 "<= 8 chars\n", progname);
903 strncpy(mop->mo_ldd.ldd_fsname, optarg,
904 sizeof(mop->mo_ldd.ldd_fsname) - 1);
907 mop->mo_ldd.ldd_flags &= ~LDD_F_SV_TYPE_MGS;
913 mop->mo_ldd.ldd_flags |= LDD_F_SV_TYPE_OST;
922 mop->mo_flags |= MO_FORCEFORMAT;
925 if (IS_MDT(&mop->mo_ldd)) {
926 mop->mo_ldd.ldd_stripe_sz = atol(optarg) * 1024;
928 badopt(long_opt[longidx].name, "MDT");
933 mop->mo_ldd.ldd_timeout = atol(optarg);
941 fprintf(stderr, "Unknown option '%c'\n", opt);
947 if (optind >= argc) {
949 fprintf(stderr, "Bad arguments\n");
957 int main(int argc, char *const argv[])
959 struct mkfs_opts mop;
960 char *mountopts = NULL;
961 char default_mountopts[1024] = "";
964 if ((progname = strrchr(argv[0], '/')) != NULL)
975 memset(&mop, 0, sizeof(mop));
978 /* device is last arg */
979 strcpy(mop.mo_device, argv[argc - 1]);
980 /* Are we using a loop device? */
981 ret = is_block(mop.mo_device);
985 mop.mo_flags |= MO_IS_LOOP;
988 /* For tunefs, we must read in the old values before parsing any
990 /* Create the loopback file */
991 if (mop.mo_flags & MO_IS_LOOP) {
992 ret = access(mop.mo_device, F_OK);
994 ret = loop_setup(&mop);
997 fprintf(stderr, "Loop device setup for %s failed: %s\n",
998 mop.mo_device, strerror(ret));
1003 /* Check whether the disk has already been formatted by mkfs.lustre */
1004 ret = is_lustre_target(&mop);
1007 fprintf(stderr, "Device %s has not been formatted with "
1008 "mkfs.lustre\n", mop.mo_device);
1012 ret = read_local_files(&mop);
1015 fprintf(stderr, "Failed to read previous Lustre data from %s\n",
1021 print_ldd("Read previous values", &(mop.mo_ldd));
1024 ret = parse_opts(argc, argv, &mop, mountopts);
1028 if (!(IS_MDT(&mop.mo_ldd) || IS_OST(&mop.mo_ldd) ||
1029 IS_MGS(&mop.mo_ldd))) {
1031 fprintf(stderr, "must set target type :{mdt,ost,mgs}\n");
1037 if (IS_MDT(&mop.mo_ldd) && !IS_MGS(&mop.mo_ldd) &&
1038 mop.mo_ldd.ldd_mgsnid_count == 0) {
1039 vprint("No management node specified, adding MGS to this "
1041 mop.mo_ldd.ldd_flags |= LDD_F_SV_TYPE_MGS;
1044 if (!IS_MGS(&mop.mo_ldd) && (mop.mo_ldd.ldd_mgsnid_count == 0)) {
1046 fprintf(stderr, "Must specify either --mgs or --mgsnid\n");
1051 /* These are the permanent mount options (always included) */
1052 switch (mop.mo_ldd.ldd_mount_type) {
1054 case LDD_MT_LDISKFS: {
1055 sprintf(default_mountopts, "errors=remount-ro");
1056 if (IS_MDT(&mop.mo_ldd) || IS_MGS(&mop.mo_ldd))
1057 strcat(default_mountopts,
1058 ",iopen_nopriv,user_xattr");
1059 if ((get_os_version() == 24) && IS_OST(&mop.mo_ldd))
1060 strcat(default_mountopts, ",asyncdel");
1061 /* Files created while extents are enabled cannot be read if
1062 mounted with a kernel that doesn't include the CFS patches.*/
1063 if (IS_OST(&mop.mo_ldd) &&
1064 mop.mo_ldd.ldd_mount_type == LDD_MT_LDISKFS) {
1065 strcat(default_mountopts, ",extents,mballoc");
1071 mop.mo_flags |= MO_IS_LOOP;
1072 sprintf(default_mountopts, "type=ext3,dev=%s",
1078 fprintf(stderr, "unknown fs type %d '%s'\n",
1079 mop.mo_ldd.ldd_mount_type,
1080 MT_STR(&mop.mo_ldd));
1086 #ifndef TUNEFS /* mkfs.lustre */
1088 /* Tack on user supplied opts */
1089 sprintf(mop.mo_ldd.ldd_mount_opts, "%s,%s",
1090 default_mountopts, mountopts);
1092 strcpy(mop.mo_ldd.ldd_mount_opts, default_mountopts);
1093 #else /* tunefs.lustre - if mountopts are specified, they override
1094 whatever we had before, so no defaults. */
1096 strcpy(mop.mo_ldd.ldd_mount_opts, mountopts);
1097 else if (*mop.mo_ldd.ldd_mount_opts == 0)
1098 /* no mount opts were set ever, use the defaults. */
1099 strcpy(mop.mo_ldd.ldd_mount_opts, default_mountopts);
1100 /* otherwise, use the old. */
1103 ldd_make_sv_name(&(mop.mo_ldd));
1106 print_ldd("Permanent disk data", &(mop.mo_ldd));
1109 printf("exiting before disk write.\n");
1113 #ifndef TUNEFS /* mkfs.lustre */
1114 /* Create the loopback file of the correct size */
1115 if (mop.mo_flags & MO_IS_LOOP) {
1116 ret = access(mop.mo_device, F_OK);
1117 /* Don't destroy the loopback file if no FORCEFORMAT */
1118 if (ret || (mop.mo_flags & MO_FORCEFORMAT))
1119 ret = loop_format(&mop);
1121 ret = loop_setup(&mop);
1124 fprintf(stderr, "Loop device setup failed: %s\n",
1130 /* Check whether the disk has already been formatted by mkfs.lustre */
1131 if (!(mop.mo_flags & MO_FORCEFORMAT)) {
1132 ret = is_lustre_target(&mop);
1135 fprintf(stderr, "Device %s was previously formatted "
1136 "for lustre. Use --reformat to reformat it, "
1137 "or tunefs.lustre to modify.\n",
1143 /* Format the backing filesystem */
1144 ret = make_lustre_backfs(&mop);
1147 fprintf(stderr, "mkfs failed %d\n", ret);
1152 ret = write_local_files(&mop);
1155 fprintf(stderr, "failed to write local files\n");