1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002 Cluster File Systems, Inc.
5 * Author: Lin Song Tao <lincent@clusterfs.com>
6 * Author: Nathan Rutman <nathan@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
32 #include <sys/types.h>
38 #include <linux/types.h>
39 #include <linux/lustre_disk.h>
40 #include <portals/ptlctl.h>
43 /* So obd.o will link */
45 command_t cmdlist[] = {
50 #define MAX_LOOP_DEVICES 256
52 static char *progname;
53 static int verbose = 1;
55 /* for running system() */
57 static char cmd_out[32][128];
58 static char *ret_file = "/tmp/mkfs.log";
61 static char loop_base[20];
65 fprintf(out, "usage: %s [options] <device>\n", progname);
68 "\t<device>:block device or file (e.g /dev/sda or /tmp/ost1)\n"
70 "\t\t--ost: object storage, mutually exclusive with mdt\n"
71 "\t\t--mdt: metadata storage, mutually exclusive with ost\n"
72 "\t\t--mgmt: configuration management service - one per site\n"
73 "\t\t--mgmtnode=<mgtnode>[,<failover-mgtnode>]:nid of a remote\n"
74 "\t\t\tmgmt node [and the failover mgmt node]\n"
75 "\t\t--fsname=<filesystem_name>\n"
76 "\t\t--configdev=<altdevice|file>: store configuration info\n"
77 "\t\t\tfor this device on an alternate device\n"
78 "\t\t--failover=<failover-address>\n"
79 "\t\t--backfstype=<fstype>: backing fs type (ext3, ldiskfs)\n"
80 "\t\t--device_size=#N(KB):device size \n"
81 "\t\t--stripe_count=#N:number of stripe\n"
82 "\t\t--stripe_size=#N(KB):stripe size\n"
83 "\t\t--index=#N:target index\n"
84 "\t\t--mountfsoptions=<opts>: permanent mount options\n"
85 "\t\t--mkfsoptions=<opts>: format options\n"
86 "\t\t--timeout=<secs>: system timeout period\n"
87 "\t\t--startupwait=<secs>: time to wait for other servers to join\n"
88 "\t\t--reformat: overwrite an existing disk\n"
93 #define vprint if (verbose) printf
95 static void fatal(void)
98 fprintf(stderr, "\n%s FATAL: ", progname);
102 dev_major (unsigned long long int __dev)
104 return ((__dev >> 8) & 0xfff) | ((unsigned int) (__dev >> 32) & ~0xfff);
108 dev_minor (unsigned long long int __dev)
110 return (__dev & 0xff) | ((unsigned int) (__dev >> 12) & ~0xff);
115 static int version = 0;
119 char release[4] = "";
121 fd = open("/proc/sys/kernel/osrelease", O_RDONLY);
123 fprintf(stderr, "Warning: Can't resolve kernel version,"
126 read(fd, release, 4);
129 if (strncmp(release, "2.4.", 4) == 0)
137 //Ugly implement. FIXME
138 int run_command(char *cmd)
143 vprint("cmd: %s\n", cmd);
146 strcat(cmd, ret_file);
147 strcat(cmd, " 2>&1");
151 rfile = fopen(ret_file, "r");
153 fprintf(stderr,"Could not open %s \n",ret_file);
157 memset(cmd_out, 0, sizeof(cmd_out));
158 while (fgets(cmd_out[i], 128, rfile) != NULL) {
159 if (verbose > 2) printf(" _ %s", cmd_out[i]);
162 fprintf(stderr,"WARNING losing some output from %s",
172 static void run_command_out()
175 for (i = 0; i < 32; i++) {
176 if (strlen(cmd_out[i]) == 0)
178 fprintf(stderr, cmd_out[i]);
182 /* Figure out the loop device names */
185 if (!access("/dev/loop0", F_OK | R_OK))
186 strcpy(loop_base, "/dev/loop\0");
187 else if (!access("/dev/loop/0", F_OK | R_OK))
188 strcpy(loop_base, "/dev/loop/\0");
190 fprintf(stderr, "can't access loop devices\n");
196 /* Setup a file in the first unused loop_device */
197 int loop_setup(struct mkfs_opts *mop)
202 for (i = 0; i < MAX_LOOP_DEVICES; i++) {
203 sprintf(l_device, "%s%d", loop_base, i);
204 if (access(l_device, F_OK | R_OK))
207 sprintf(cmd, "losetup %s", l_device);
208 ret = run_command(cmd);
209 /* losetup gets 1 (256?) for good non-set-up device */
211 sprintf(cmd, "losetup %s %s", l_device, mop->mo_device);
212 ret = run_command(cmd);
214 fprintf(stderr, "error %d on losetup: %s\n",
218 strcpy(mop->mo_loopdev, l_device);
223 fprintf(stderr,"out of loop devices!\n");
227 int loop_cleanup(struct mkfs_opts *mop)
230 if (mop->mo_flags & MO_IS_LOOP) {
231 sprintf(cmd, "losetup -d %s", mop->mo_loopdev);
232 ret = run_command(cmd);
237 /* Determine if a device is a block device (as opposed to a file) */
238 int is_block(char* devname)
243 ret = access(devname, F_OK);
246 ret = stat(devname, &st);
248 fprintf(stderr, "cannot stat %s\n",devname);
251 return S_ISBLK(st.st_mode);
254 /* Get the devsize from /proc/partitions with the major and minor number */
255 int device_size_proc(char* device)
257 int major,minor,i,ret;
261 ret = stat(device,&st);
263 fprintf(stderr,"can not stat %s\n",device);
266 major = dev_major(st.st_rdev);
267 minor = dev_minor(st.st_rdev);
269 sprintf(cmd, "cat /proc/partitions");
270 ret = run_command(cmd);
271 for (i = 0; i < 32; i++) {
272 if (strlen(cmd_out[i]) == 0)
274 ma = strtok(cmd_out[i], " ");
275 mi = strtok(NULL, " ");
276 if ((major == atol(ma)) && (minor == atol(mi))) {
277 sz = strtok(NULL," ");
282 return 0; //FIXME : no entries in /proc/partitions
285 void set_nid_pair(struct host_desc *nids, char *str)
287 nids->primary = libcfs_str2nid(str);
288 // FIXME secondary too (,altnid)
291 /* Write the server config files */
292 int write_local_files(struct mkfs_opts *mop)
294 struct lr_server_data lsd;
295 char mntpt[] = "/tmp/mntXXXXXX";
297 char local_mount_opts[sizeof(mop->mo_ldd.ldd_mount_opts)] = "";
301 /* Mount this device temporarily in order to write these files */
302 vprint("mounting backing device\n");
303 if (!mkdtemp(mntpt)) {
304 fprintf(stderr, "Can't create temp mount point %s: %s\n",
305 mntpt, strerror(errno));
309 if (mop->mo_flags & MO_IS_LOOP) {
310 /* ext3 can't understand iopen_nopriv, others */
311 // FIXME ext3 on 2.6 can't. So use ldiskfs on 2.6
312 if (strlen(mop->mo_ldd.ldd_mount_opts))
313 snprintf(local_mount_opts, sizeof(local_mount_opts),
314 "loop,%s", mop->mo_ldd.ldd_mount_opts);
316 sprintf(local_mount_opts, "loop");
318 sprintf(cmd, "mount -t %s %s%s %s %s",
319 MT_STR(&mop->mo_ldd), strlen(local_mount_opts) ? "-o ": "",
320 local_mount_opts, mop->mo_device, mntpt);
321 ret = run_command(cmd);
323 fprintf(stderr, "Unable to mount %s\n", mop->mo_device);
328 /* Set up initial directories */
329 sprintf(filepnm, "%s/%s", mntpt, MOUNT_CONFIGS_DIR);
330 ret = mkdir(filepnm, 0777);
332 fprintf(stderr, "Can't make configs dir %s (%d)\n",
337 /* Save the persistent mount data into a file. Lustre must pre-read
338 this file to get the real mount options. */
339 vprint("Writing %s\n", MOUNT_DATA_FILE);
340 sprintf(filepnm, "%s/%s", mntpt, MOUNT_DATA_FILE);
341 filep = fopen(filepnm, "w");
343 fprintf(stderr, "Unable to create %s file\n", filepnm);
346 fwrite(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep);
349 /* Create the inital last_rcvd file */
350 vprint("Writing %s\n", LAST_RCVD);
351 sprintf(filepnm, "%s/%s", mntpt, LAST_RCVD);
352 filep = fopen(filepnm, "w");
355 fprintf(stderr,"Unable to create %s file\n", filepnm);
358 memset(&lsd, 0, sizeof(lsd));
359 strncpy(lsd.lsd_uuid, mop->mo_ldd.ldd_svname, sizeof(lsd.lsd_uuid));
360 lsd.lsd_index = mop->mo_index;
361 lsd.lsd_feature_compat |= cpu_to_le32(LR_COMPAT_COMMON_LR);
362 lsd.lsd_server_size = cpu_to_le32(LR_SERVER_SIZE);
363 lsd.lsd_client_start = cpu_to_le32(LR_CLIENT_START);
364 lsd.lsd_client_size = cpu_to_le16(LR_CLIENT_SIZE);
365 if (IS_MDT(&mop->mo_ldd))
366 lsd.lsd_feature_rocompat = cpu_to_le32(MDS_ROCOMPAT_LOVOBJID);
368 fwrite(&lsd, sizeof(lsd), 1, filep);
372 vprint("unmounting backing device\n");
373 sprintf(cmd, "umount %s", mntpt);
380 int loop_format(struct mkfs_opts *mop)
386 sprintf(cmd, "dd if=/dev/zero bs=1k count=0 seek=%ld of=%s",
387 mop->mo_device_sz, mop->mo_device);
388 ret = run_command(cmd);
390 fprintf(stderr, "Unable to create backing store: %d\n", ret);
395 /* Build fs according to type */
396 int make_lustre_backfs(struct mkfs_opts *mop)
404 if (mop->mo_device_sz != 0) {
405 if (mop->mo_device_sz < 8096){
406 fprintf(stderr, "size of filesystem must be larger "
407 "than 8MB, but is set to %ldKB\n",
411 block_count = mop->mo_device_sz / 4; /* block size is 4096 */
414 if ((mop->mo_ldd.ldd_mount_type == LDD_MT_EXT3) ||
415 (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS)) {
416 long device_sz = mop->mo_device_sz;
418 /* we really need the size */
420 sprintf(cmd, "sfdisk -s %s", mop->mo_device);
421 ret = run_command(cmd);
423 device_sz = atol(cmd_out[0]);
425 device_sz = device_size_proc(mop->mo_device);
428 if (strstr(mop->mo_mkfsopts, "-J") == NULL) {
429 /* Choose our own default journal size */
431 if (device_sz > 1024 * 1024)
432 journal_sz = (device_sz / 102400) * 4;
433 if (journal_sz > 400)
436 sprintf(buf, " -J size=%ld", journal_sz);
437 strcat(mop->mo_mkfsopts, buf);
441 /* Default is block size */
442 if (strstr(mop->mo_mkfsopts, "-i") == NULL) {
443 long bytes_per_inode = 0;
445 if (IS_MDT(&mop->mo_ldd))
446 bytes_per_inode = 4096;
448 /* Allocate fewer inodes on large OST devices. Most
449 filesystems can be much more aggressive than even
451 if ((IS_OST(&mop->mo_ldd) && (device_sz > 1000000)))
452 bytes_per_inode = 16384;
454 if (bytes_per_inode > 0) {
455 sprintf(buf, " -i %ld", bytes_per_inode);
456 strcat(mop->mo_mkfsopts, buf);
460 /* This is an undocumented mke2fs option. Default is 128. */
461 if (strstr(mop->mo_mkfsopts, "-I") == NULL) {
463 if (IS_MDT(&mop->mo_ldd)) {
464 if (mop->mo_stripe_count > 77)
465 inode_size = 512; /* bz 7241 */
466 else if (mop->mo_stripe_count > 34)
468 else if (mop->mo_stripe_count > 13)
474 if (inode_size > 0) {
475 sprintf(buf, " -I %ld", inode_size);
476 strcat(mop->mo_mkfsopts, buf);
481 sprintf(mkfs_cmd, "mkfs.ext2 -j -b 4096 -L %s ",
482 mop->mo_ldd.ldd_svname);
484 } else if (mop->mo_ldd.ldd_mount_type == LDD_MT_REISERFS) {
486 if (journal_sz > 0) { /* FIXME */
487 sprintf(buf, " --journal_size %ld", journal_sz);
488 strcat(mop->mo_mkfsopts, buf);
490 sprintf(mkfs_cmd, "mkreiserfs -ff ");
493 fprintf(stderr,"unsupported fs type: %d (%s)\n",
494 mop->mo_ldd.ldd_mount_type,
495 MT_STR(&mop->mo_ldd));
500 dev = mop->mo_device;
501 if (mop->mo_flags & MO_IS_LOOP) {
502 ret = loop_format(mop);
504 ret = loop_setup(mop);
507 fprintf(stderr, "Loop device setup failed %d\n", ret);
510 dev = mop->mo_loopdev;
513 vprint("formatting backing filesystem %s on %s\n",
514 MT_STR(&mop->mo_ldd), dev);
515 vprint("\tservice name %s\n", mop->mo_ldd.ldd_svname);
516 vprint("\t4k blocks %d\n", block_count);
517 vprint("\toptions %s\n", mop->mo_mkfsopts);
519 /* mkfs_cmd's trailing space is important! */
520 strcat(mkfs_cmd, mop->mo_mkfsopts);
521 strcat(mkfs_cmd, " ");
522 strcat(mkfs_cmd, dev);
523 if (block_count != 0) {
524 sprintf(buf, " %d", block_count);
525 strcat(mkfs_cmd, buf);
528 vprint("mkfs_cmd = %s\n", mkfs_cmd);
529 ret = run_command(mkfs_cmd);
532 fprintf(stderr, "Unable to build fs: %s \n", dev);
537 /* Enable hashed b-tree directory lookup in large dirs
539 if ((mop->mo_ldd.ldd_mount_type == LDD_MT_EXT3) ||
540 (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS)) {
541 sprintf(cmd, "tune2fs -O dir_index %s", dev);
542 ret = run_command(cmd);
545 fprintf(stderr,"Unable to enable htree: %s\n",
556 static int load_module(char *module_name)
561 vprint("loading %s\n", module_name);
562 sprintf(buf, "/sbin/modprobe %s", module_name);
565 fprintf(stderr, "%s: failed to modprobe %s (%d)\n",
566 progname, module_name, rc);
567 fprintf(stderr, "Check /etc/modules.conf\n");
572 static int load_modules(struct mkfs_opts *mop)
576 //client: rc = load_module("lustre");
577 vprint("Loading modules...");
579 /* portals, ksocknal, fsfilt, etc. in modules.conf */
580 rc = load_module("_lustre");
583 /* FIXME currently use the MDT to write llogs, should be a MGS */
584 rc = load_module("mds");
589 static int jt_setup()
593 ret = access(PORTALS_DEV_PATH, F_OK);
595 system("mknod "PORTALS_DEV_PATH" c 10 240");
596 ret = access(OBD_DEV_PATH, F_OK);
598 system("mknod "OBD_DEV_PATH" c 10 241");
600 ptl_initialize(0, NULL);
601 obd_initialize(0, NULL);
605 /* see jt_ptl_network */
606 int jt_getnids(ptl_nid_t *nidarray, int maxnids)
608 struct portal_ioctl_data data;
612 for (count = 0; count < maxnids; count++) {
613 PORTAL_IOC_INIT (data);
614 data.ioc_count = count;
615 rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NI, &data);
618 vprint("%s\n", libcfs_nid2str(data.ioc_nid));
619 nidarray[count] = data.ioc_nid;
626 fprintf(stderr,"IOC_PORTAL_GET_NI error %d: %s\n",
627 errno, strerror(errno));
632 printf("<no local networks>\n");
636 static void jt_print(char *cmd_name, int argc, char **argv)
639 printf("%-20.20s: ", cmd_name);
641 printf("%s ", argv[i]);
647 static int _do_jt(int (*cmd)(int argc, char **argv), char *cmd_name, ...)
655 va_start(ap, cmd_name);
657 s = va_arg(ap, char *);
660 jt_cmds[i] = malloc(strlen(s) + 1);
661 strcpy(jt_cmds[i], s);
667 jt_print(cmd_name, i, jt_cmds);
669 ret = (*cmd)(i, jt_cmds);
671 fprintf(stderr, "%s: jt_cmd %s: (%d) %s\n",
672 progname, jt_cmds[0], ret, strerror(abs(ret)));
680 #define do_jt(cmd, a...) if ((ret = _do_jt(cmd, #cmd, ## a))) goto out_jt
681 #define do_jt_noret(cmd, a...) _do_jt(cmd, #cmd, ## a)
683 int write_llog_files(struct mkfs_opts *mop)
685 char confname[] = "llog_writer";
692 vprint("Creating Lustre logs\n");
693 if ((ret = jt_setup()))
698 do_jt_noret(jt_dbg_modules, "modules", 0);
701 dev = mop->mo_device;
702 if (mop->mo_flags & MO_IS_LOOP) {
703 ret = loop_setup(mop);
706 dev = mop->mo_loopdev;
709 /* FIXME can't we just write these log files ourselves? Why do we
710 have to go through an obd at all? jt_ioc_dump()? */
711 /* FIXME use mgmt server obd to write logs. Can start it by mounting
713 /* Set up a temporary obd for writing logs.
714 mds and confobd can handle OBD_IOC_DORECORD */
715 ret = do_jt_noret(jt_lcfg_attach, "attach", "mds"/*confobd*/, confname,
716 mop->mo_ldd.ldd_svname/*uuid*/, 0);
719 ret = do_jt_noret(jt_lcfg_device, "cfg_device", confname, 0);
722 do_jt(jt_lcfg_setup, "setup", dev,
723 MT_STR(&mop->mo_ldd), /*mop->mo_ldd.ldd_mount_opts,*/ 0);
724 /* Record on this device. */
725 do_jt(jt_obd_device, "device", confname, 0);
727 snprintf(name, sizeof(name), "%s-conf", mop->mo_ldd.ldd_svname);
729 if (IS_OST(&mop->mo_ldd)) {
730 do_jt(jt_cfg_clear_log, "clear_log", name, 0);
731 do_jt(jt_cfg_record, "record", name, 0);
732 do_jt(jt_lcfg_attach, "attach", "obdfilter",
733 mop->mo_ldd.ldd_svname, mop->mo_ldd.ldd_svname/*uuid*/, 0);
734 do_jt(jt_lcfg_device, "cfg_device", mop->mo_ldd.ldd_svname, 0);
735 /* FIXME setup needs to change - no disk info */
736 do_jt(jt_lcfg_setup, "setup", mop->mo_device,
737 MT_STR(&mop->mo_ldd),
738 "f", /* f=recovery enabled, n=disabled */
739 mop->mo_ldd.ldd_mount_opts, 0);
740 do_jt(jt_cfg_endrecord, "endrecord", 0);
741 do_jt(jt_cfg_dump_log, "dump_log", name, 0);
743 do_jt(jt_cfg_clear_log, "clear_log", "OSS-conf", 0);
744 do_jt(jt_cfg_record, "record", "OSS-conf", 0);
745 do_jt(jt_lcfg_attach, "attach", "ost", "OSS", "OSS_UUID", 0);
746 do_jt(jt_lcfg_device, "cfg_device", "OSS", 0);
747 do_jt(jt_lcfg_setup, "setup", 0);
749 do_jt(jt_lcfg_set_timeout, "set_timeout",
751 do_jt(jt_cfg_endrecord, "endrecord", 0);
754 if (IS_MDT(&mop->mo_ldd)) {
755 ptl_nid_t nidarray[128];
756 char scnt[20], ssz[20], soff[20], spat[20];
757 char cliname[sizeof(mop->mo_ldd.ldd_fsname)];
758 char mdcname[sizeof(mop->mo_ldd.ldd_fsname)];
762 /* Write mds-conf log */
763 do_jt(jt_cfg_clear_log, "clear_log", name, 0);
764 do_jt(jt_cfg_record, "record", name, 0);
765 do_jt(jt_lcfg_attach, "attach", "mdt", "MDT", "MDT_UUID", 0);
766 do_jt(jt_lcfg_device, "cfg_device", "MDT", 0);
767 do_jt(jt_lcfg_setup, "setup", 0);
768 do_jt(jt_lcfg_attach, "attach", "mds", mop->mo_ldd.ldd_svname,
769 mop->mo_ldd.ldd_svname/*uuid*/, 0);
770 do_jt(jt_lcfg_device, "cfg_device", mop->mo_ldd.ldd_svname, 0);
771 do_jt(jt_lcfg_setup, "setup", mop->mo_device,
772 MT_STR(&mop->mo_ldd), mop->mo_ldd.ldd_svname,
773 mop->mo_ldd.ldd_mount_opts, 0);
775 do_jt(jt_lcfg_set_timeout, "set_timeout",
777 do_jt(jt_cfg_endrecord, "endrecord", 0);
779 /* Write mds startup log */
780 do_jt(jt_cfg_clear_log, "clear_log", mop->mo_ldd.ldd_svname, 0);
781 do_jt(jt_cfg_record, "record", mop->mo_ldd.ldd_svname, 0);
782 /*attach lov lov_conf_mdsA f0591_lov_conf_mdsA_224a85b5fc
783 lov_setup lovA_UUID 0 1048576 0 0 ost1_UUID
784 mount_option mdsA lov_conf_mdsA
786 snprintf(name, sizeof(name), "lov-%s", mop->mo_ldd.ldd_svname);
787 do_jt(jt_lcfg_attach, "attach", "lov", name,
789 snprintf(scnt, sizeof(scnt), "%d", mop->mo_stripe_count);
790 snprintf(ssz, sizeof(ssz), "%d", mop->mo_stripe_sz);
791 snprintf(soff, sizeof(soff), "%d", 0 /*FIXME?*/);
792 snprintf(spat, sizeof(spat), "%d", mop->mo_stripe_pattern);
793 do_jt(jt_lcfg_lov_setup, "lov_setup", name/*uuid*/,
794 scnt, ssz, soff, spat, 0);
795 /* Then for every failover ost pair we would add to mdt and client:
796 #03 L add_uuid nid=c0a80203 nal_type=0 0:(null) 1:NID_uml3_UUID
797 #04 L attach 0:OSC_uml1_ost1_MNT_client 1:osc 2:e61f5_lov1_84b41a5f41
798 #05 L setup 0:OSC_uml1_ost1_MNT_client 1:ost1_UUID 2:NID_uml3_UUID
799 #06 L add_uuid nid=c0a80204 nal_type=0 0:(null) 1:NID_uml4_UUID
800 #07 L add_conn 0:OSC_uml1_ost1_MNT_client 1:NID_uml4_UUID
801 #08 L lov_modify_tgts add 0:lov1 1:ost1_UUID 2: 3:
803 /* This was an old hack to pass the lov name to the MDS:
804 mds_postsetup calls class_get_profile
805 to lookup the lov name: (profile=mds,osc=lov,mdc=0);
806 This command was originally intended for clients:
807 class_add_profile(profile,osc,mdc).
808 FIXME if we always make lovname=f(mdsname), we probably
810 do_jt(jt_lcfg_mount_option, "mount_option",
811 mop->mo_ldd.ldd_svname/*mds*/, name/*lov*/, 0);
813 do_jt(jt_lcfg_set_timeout, "set_timeout",
815 do_jt(jt_cfg_endrecord, "endrecord", 0);
817 /* Write client startup logs */
818 numnids = jt_getnids(nidarray,
819 sizeof(nidarray) / sizeof(nidarray[0]));
821 //Let the MGS create the client logs after the MDT has registered
823 fprintf(stderr, "%s: Can't figure out local nids, "
824 "skipping client log creation\n", progname);
828 snprintf(mdcname, sizeof(mdcname), "%s-mdc",
829 mop->mo_ldd.ldd_fsname);
832 nid = nidarray[numnids];
833 snprintf(cliname, sizeof(cliname), "client-%s",
834 libcfs_net2str(PTL_NIDNET(nid)));
835 vprint("log for %s\n", cliname);
836 do_jt(jt_cfg_clear_log, "clear_log", cliname, 0);
837 do_jt(jt_cfg_record, "record", cliname, 0);
838 do_jt(jt_lcfg_attach, "attach", "lov", name,
840 do_jt(jt_lcfg_lov_setup, "lov_setup", name/*uuid*/,
841 scnt, ssz, soff, spat, 0);
842 /* add osts here as in mdt above */
844 #09 L add_uuid nid=c0a80201 nal_type=0 0:(null) 1:NID_uml1_UUID
845 #10 L attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:efdac_MNT_client_fec96dc7f9
846 #11 L setup 0:MDC_uml1_mdsA_MNT_client 1:mdsA_UUID 2:NID_uml1_UUID
847 #12 L add_uuid nid=c0a80202 nal_type=0 0:(null) 1:NID_uml2_UUID
848 #13 L add_conn 0:MDC_uml1_mdsA_MNT_client 1:NID_uml2_UUID
850 /* FIXME we need to put _all_possible_nids_ for
851 every server in the client startup llog. client
852 will then choose which nid to use. */
853 do_jt(jt_lcfg_add_uuid, "add_uuid",
854 mop->mo_ldd.ldd_svname /*FIXME mds name */,
855 libcfs_nid2str(mop->mo_hostnid.primary), 0);
856 do_jt(jt_lcfg_attach, "attach", "mdc", mdcname,
858 do_jt(jt_lcfg_device, "cfg_device", mdcname, 0);
859 /* mdc_setup client_uuid server_uuid */
860 do_jt(jt_lcfg_setup, "setup", cliname,
861 mop->mo_ldd.ldd_svname, 0);
862 if (mop->mo_hostnid.backup != PTL_NID_ANY) {
863 do_jt(jt_lcfg_add_uuid, "add_uuid",
864 libcfs_nid2str(mop->mo_hostnid.backup),
865 mop->mo_hostnid.backup, 0);
866 do_jt(jt_lcfg_add_conn, "add_conn",
867 libcfs_nid2str(mop->mo_hostnid.backup)/*uuid*/, 0);
869 do_jt(jt_lcfg_mount_option, "mount_option",
870 cliname, name/*osc(lov)*/, mdcname, 0);
872 do_jt(jt_lcfg_set_timeout, "set_timeout",
880 /* Assume we erred while writing a record */
881 do_jt_noret(jt_cfg_endrecord, "endrecord", 0);
882 /* Clean up the confobd when we're done writing logs */
883 do_jt_noret(jt_lcfg_device, "cfg_device", confname, 0);
884 do_jt_noret(jt_obd_cleanup, "cleanup", 0);
885 do_jt_noret(jt_obd_detach, "detach", 0);
887 obd_finalize(1, (char **)&name /*dummy*/);
892 /* Make the mdt/ost server obd name based on the filesystem name */
893 static void make_sv_name(struct mkfs_opts *mop)
895 /* FIXME if we're not given an index, we have to change our name
896 later -- can't have two servers with the same name.
897 So rewrite ost log, last_rcvd, and disk label, or we need to talk
898 to MGMT now to get index # */
900 if (IS_MDT(&mop->mo_ldd) || IS_OST(&mop->mo_ldd)) {
901 sprintf(mop->mo_ldd.ldd_svname, "%.8s-%s%04x",
902 mop->mo_ldd.ldd_fsname,
903 IS_MDT(&mop->mo_ldd) ? "MDT" : "OST",
906 sprintf(mop->mo_ldd.ldd_svname, "MGMT");
908 vprint("Server name: %s\n", mop->mo_ldd.ldd_svname);
911 void set_defaults(struct mkfs_opts *mop)
914 mop->mo_ldd.ldd_magic = LDD_MAGIC;
915 mop->mo_ldd.ldd_flags = LDD_F_NEED_INDEX;
917 if (get_os_version() == 24)
918 mop->mo_ldd.ldd_mount_type = LDD_MT_EXT3;
920 mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS;
922 strcpy(mop->mo_ldd.ldd_fsname, "lustre");
923 mop->mo_stripe_count = 1;
926 gethostname(hostname, sizeof(hostname));
927 //mop->mo_hostnid.primary = libcfs_str2nid(hostname);
930 static inline void badopt(char opt, char *type)
932 fprintf(stderr, "%s: '%c' only valid for %s\n",
933 progname, opt, type);
938 int main(int argc , char *const argv[])
940 struct mkfs_opts mop;
941 static struct option long_opt[] = {
942 {"backfstype", 1, 0, 'b'},
943 {"configdev", 1, 0, 'C'},
944 {"device_size", 1, 0, 'd'},
945 {"fsname",1, 0, 'n'},
946 {"failover", 1, 0, 'f'},
950 {"mgmtnode", 1, 0, 'm'},
951 {"mkfsoptions", 1, 0, 'k'},
952 {"mountfsoptions", 1, 0, 'o'},
954 {"reformat", 0, 0, 'r'},
955 {"startupwait", 1, 0, 'w'},
956 {"stripe_count", 1, 0, 'c'},
957 {"stripe_size", 1, 0, 's'},
958 {"stripe_index", 1, 0, 'i'},
959 {"index", 1, 0, 'i'},
960 {"timeout", 1, 0, 't'},
961 {"verbose", 0, 0, 'v'},
964 char *optstring = "b:C:d:n:f:hI:MGm:k:o:Orw:c:s:i:t:v";
966 char *mountopts = NULL;
973 memset(&mop, 0, sizeof(mop));
976 while ((opt = getopt_long(argc, argv, optstring, long_opt, NULL)) !=
981 while (i < LDD_MT_LAST) {
982 if (strcmp(optarg, mt_str(i)) == 0) {
983 mop.mo_ldd.ldd_mount_type = i;
994 if (IS_MDT(&mop.mo_ldd)) {
995 int stripe_count = atol(optarg);
996 mop.mo_stripe_count = stripe_count;
1002 mop.mo_device_sz = atol(optarg);
1005 strncpy(mop.mo_mkfsopts, optarg,
1006 sizeof(mop.mo_mkfsopts) - 1);
1009 /* we must pass this info on when we register with
1011 //mop.mo_hostnid.backup = libcfs_str2nid(optarg);
1014 mop.mo_ldd.ldd_flags |= LDD_F_SV_TYPE_MGMT;
1020 if (IS_MDT(&mop.mo_ldd) || IS_OST(&mop.mo_ldd)) {
1021 mop.mo_index = atol(optarg);
1022 mop.mo_ldd.ldd_flags &= ~LDD_F_NEED_INDEX;
1024 badopt(opt, "MDT,OST");
1028 if (IS_MGMT(&mop.mo_ldd))
1029 badopt(opt, "non-MGMT MDT,OST");
1030 set_nid_pair(&mop.mo_ldd.ldd_mgmtnid, optarg);
1033 mop.mo_ldd.ldd_flags |= LDD_F_SV_TYPE_MDT;
1036 if (!(IS_MDT(&mop.mo_ldd) || IS_OST(&mop.mo_ldd)))
1037 badopt(opt, "MDT,OST");
1038 if (strlen(optarg) > 8) {
1039 fprintf(stderr, "%s: filesystem name must be "
1040 "<= 8 chars\n", progname);
1044 strncpy(mop.mo_ldd.ldd_fsname, optarg,
1045 sizeof(mop.mo_ldd.ldd_fsname) - 1);
1051 mop.mo_ldd.ldd_flags |= LDD_F_SV_TYPE_OST;
1054 mop.mo_flags |= MO_FORCEFORMAT;
1057 if (IS_MDT(&mop.mo_ldd))
1058 mop.mo_stripe_sz = atol(optarg) * 1024;
1063 mop.mo_timeout = atol(optarg);
1071 fprintf(stderr, "Unknown option '%c'\n", opt);
1077 if (optind >= argc) {
1079 fprintf(stderr, "Bad arguments\n");
1083 if (!(IS_MDT(&mop.mo_ldd) || IS_OST(&mop.mo_ldd) ||
1084 IS_MGMT(&mop.mo_ldd))) {
1086 fprintf(stderr, "must set server type :{mdt,ost,mgmt}\n");
1090 if (IS_MDT(&mop.mo_ldd) && !IS_MGMT(&mop.mo_ldd) &&
1091 mop.mo_ldd.ldd_mgmtnid.primary == PTL_NID_ANY) {
1092 vprint("No MGMT specified, adding to this MDT\n");
1093 mop.mo_ldd.ldd_flags |= LDD_F_SV_TYPE_MGMT;
1094 //FIXME mop.mo_ldd.ldd_mgmt.primary == libcfs_str2nid(localhost);
1097 if (mop.mo_ldd.ldd_mgmtnid.primary == PTL_NID_ANY) {
1099 fprintf(stderr, "Must specify either --mgmt or --mgmtnode\n");
1103 if (IS_MDT(&mop.mo_ldd) && (mop.mo_stripe_sz == 0))
1104 mop.mo_stripe_sz = 1024 * 1024;
1106 strcpy(mop.mo_device, argv[optind]);
1108 /* These are the permanent mount options. */
1109 if ((mop.mo_ldd.ldd_mount_type == LDD_MT_EXT3) ||
1110 (mop.mo_ldd.ldd_mount_type == LDD_MT_LDISKFS)) {
1111 sprintf(mop.mo_ldd.ldd_mount_opts, "errors=remount-ro");
1113 if (IS_MDT(&mop.mo_ldd))
1114 strcat(mop.mo_ldd.ldd_mount_opts, ",iopen_nopriv");
1115 if ((get_os_version() == 24) && IS_OST(&mop.mo_ldd))
1116 strcat(mop.mo_ldd.ldd_mount_opts, ",asyncdel");
1117 } else if (mop.mo_ldd.ldd_mount_type == LDD_MT_SMFS) {
1118 sprintf(mop.mo_ldd.ldd_mount_opts, "type=ext3,dev=%s",
1122 fprintf(stderr, "%s: unknown fs type %d '%s'\n",
1123 progname, mop.mo_ldd.ldd_mount_type,
1124 MT_STR(&mop.mo_ldd));
1128 strcat(mop.mo_ldd.ldd_mount_opts, ",");
1129 strcat(mop.mo_ldd.ldd_mount_opts, mountopts);
1132 if ((mop.mo_ldd.ldd_mount_type == LDD_MT_SMFS) ||
1133 !is_block(mop.mo_device)) {
1134 mop.mo_flags |= MO_IS_LOOP;
1135 if (mop.mo_device_sz == 0) {
1137 fprintf(stderr, "loop device requires a --device_size= "
1145 ret = make_lustre_backfs(&mop);
1148 fprintf(stderr, "mkfs failed %d\n", ret);
1152 ret = write_local_files(&mop);
1155 fprintf(stderr, "failed to write local files\n");
1159 ret = write_llog_files(&mop);
1162 fprintf(stderr, "failed to write setup logs\n");