Whamcloud - gitweb
Branch b1_4_newconfig2
authornathan <nathan>
Tue, 19 Jul 2005 00:14:56 +0000 (00:14 +0000)
committernathan <nathan>
Tue, 19 Jul 2005 00:14:56 +0000 (00:14 +0000)
b=6663
rework mkfs.lustre per DLD (in progress)

lustre/include/linux/lustre_disk.h
lustre/utils/mkfs_lustre.c

index e4d7d78..a94c63b 100644 (file)
@@ -51,7 +51,7 @@ struct lr_server_data {
         __u64 lsd_catalog_oid;     /* recovery catalog object id */
         __u32 lsd_catalog_ogen;    /* recovery catalog inode generation */
         __u8  lsd_peeruuid[40];    /* UUID of MDS associated with this OST */
-        __u32 lsd_index;           /* stripe index */
+        __u32 lsd_index;           /* target index (stripe index for ost)*/
         __u8  lsd_padding[LR_SERVER_SIZE - 144];
 };
 
@@ -90,9 +90,9 @@ struct lustre_mount_data {
 
 #define LDD_MAGIC 0xbabb0001
 
-#define LDD_SV_TYPE_MDT 0x0001
-#define LDD_SV_TYPE_OST 0x0002
-#define LDD_SV_TYPE_MGT 0x0004
+#define LDD_SV_TYPE_MDT  0x0001
+#define LDD_SV_TYPE_OST  0x0002
+#define LDD_SV_TYPE_MGMT 0x0004
 
 #define LDD_FS_TYPE_EXT3     1
 #define LDD_FS_TYPE_LDISKFS  2
@@ -109,9 +109,9 @@ struct lustre_disk_data {
         char      ldd_mount_opts[128]; /* target fs mount opts */
 };
         
-#define IS_MDT(data)  ((data)->ldd_flags & LDD_SV_TYPE_MDT)
-#define IS_OST(data)  ((data)->ldd_flags & LDD_SV_TYPE_OST)
-#define IS_MGT(data)  ((data)->ldd_flags & LDD_SV_TYPE_MGT)
+#define IS_MDT(data)   ((data)->ldd_flags & LDD_SV_TYPE_MDT)
+#define IS_OST(data)   ((data)->ldd_flags & LDD_SV_TYPE_OST)
+#define IS_MGMT(data)  ((data)->ldd_flags & LDD_SV_TYPE_MGMT)
 
 
 /****************** mkfs command *********************/
@@ -121,17 +121,18 @@ struct lustre_disk_data {
 /* used to describe the options to format the lustre disk, not persistent */
 struct mkfs_opts {
         struct lustre_disk_data mo_ldd; /* to be written in MOUNT_DATA_FILE */
+        long  mo_device_sz;
         int   mo_flags; 
-        int   mo_device_sz;
-        int   mo_journal_sz;
-        int   mo_inode_sz;
         char  mo_mount_type_string[20]; /* "ext3", "ldiskfs", ... */
         char  mo_device[128];           /* disk device name */
         char  mo_mkfsopts[128];         /* options to the backing-store mkfs */
+        ptl_nid_t mo_failover_nid;
         int   mo_stripe_sz;
         int   mo_stripe_count;
         int   mo_stripe_pattern;
-        /* stripe index will be in lr_server_data for ost's */
+        int   mo_index;                 /* stripe index for osts, pool index
+                                           for pooled mdts.  index will be put
+                                           in lr_server_data */
 };
 
 
index 2f8ffdd..a69a7ed 100644 (file)
@@ -51,7 +51,7 @@ command_t cmdlist[] = {
 /* FIXME */
 #define MAX_LOOP_DEVICES 256
 
-static char *progname = "lmkfs";
+static char *progname;
 static int verbose = 1;
 
 /* for running system() */
@@ -67,16 +67,16 @@ void usage(FILE *out)
         fprintf(out, "usage: %s <type> [options] <device>\n", progname);
 
         fprintf(out, 
-                "\t<type>:type of Lustre service (mds, ost or mgt)\n"
+                "\t<type>:type of Lustre service [mds|ost|mgmt]\n"
                 "\t<device>:block device or file (e.g /dev/hda or /tmp/ost1)\n"
                 "\t-h|--help: print this usage message\n"
-                "\tmkfs.lustre options:\n"
+                "\toptions:\n"
                 "\t\t--mgmtnode=<mgtnode>[,<failover-mgtnode>]:nid of mgmt node [and the failover mgmt node]\n"
                 "\t\t--failover=<failover-address>\n"
                 "\t\t--device_size=#N(KB):device size \n"
                 "\t\t--stripe_count=#N:number of stripe\n"
                 "\t\t--stripe_size=#N(KB):stripe size\n"
-                "\t\t--stripe_index=#N:stripe index for ost\n"
+                "\t\t--index=#N:target index\n"
                 "\t\t--smfsopts <smfs options>\n"
                 "\t\t--ext3opts <ext3 options>\n");
         exit(out != stdout);
@@ -154,6 +154,7 @@ int run_command(char *cmd, char out[32][128])
        return ret;
 }
 
+/* Figure out the loop device names */
 void init_loop_base()
 {
         if (!access("/dev/loop0",F_OK|R_OK))
@@ -167,33 +168,7 @@ void init_loop_base()
         return;
 }
 
-/* Find loop device assigned to the file */
-int find_assigned_loop(char* file, char* loop_device)
-{
-        int  i,ret;
-        char l_device[20];
-        char *loop_file;
-
-        for (i = 0; i < MAX_LOOP_DEVICES; i++) {
-                sprintf(l_device, "%s%d", loop_base, i);
-                if (access(l_device, F_OK | R_OK)) 
-                        break;
-
-                sprintf(cmd, "losetup %s", l_device);
-                ret = run_command(cmd, cmd_out);
-                /* losetup gets 0 for set-up device */
-                if (!ret) {
-                        loop_file = strrchr(cmd_out[0], '(') + 1;
-                        if (!strncmp(loop_file, file, strlen(file))){
-                                strcpy(loop_device, l_device);
-                                return 1;
-                        }
-                }
-        }
-        return 0;
-}
-
-/* Setup file in first unused loop_device */
+/* Setup a file in the first unused loop_device */
 int setup_loop(char* file, char* loop_device)
 {
         int i,ret = 0;
@@ -220,11 +195,11 @@ int setup_loop(char* file, char* loop_device)
                 }
         }
         
-        fprintf(stderr,"out of loop device\n");
+        fprintf(stderr,"out of loop devices!\n");
         return 1;
 }
 
-
+/* Determine if a device is a block device (as opposed to a file) */
 int is_block(char* devname)
 {
         struct stat st;
@@ -241,7 +216,7 @@ int is_block(char* devname)
         return S_ISBLK(st.st_mode);
 }
 
-/* get the devsize from /proc/partitions with the major and minor number */
+/* Get the devsize from /proc/partitions with the major and minor number */
 int device_size_proc(char* device) 
 {
         int major,minor,i,ret;
@@ -272,7 +247,8 @@ int device_size_proc(char* device)
         return 0; //FIXME : no entries in /proc/partitions
 }
 
-int write_local_files(struct lustre_disk_data *data)
+/* Write the server config files */
+int write_local_files(struct mkfs_opts *mop)
 {
         struct lr_server_data lsd;
         char mntpt[] = "/tmp/mntXXXXXX";
@@ -280,50 +256,39 @@ int write_local_files(struct lustre_disk_data *data)
         FILE *filep;
         int ret = 0;
 
-        /* mount this device temporarily as ext3 in order to write this file */
+        /* Mount this device temporarily as ext3 in order to write this file */
         if (!mkdtemp(mntpt)) {
                 fprintf(stderr, "Can't create temp mount point %s: %s\n",
                         mntpt, strerror(errno));
                 return errno;
         }
 
-        if (data->flags & MO_IS_LOOP)
-                sprintf(cmd, "mount -o loop %s %s", data->device, mntpt);
+        if (mop->mo_flags & MO_IS_LOOP)
+                sprintf(cmd, "mount -o loop %s %s", mop->mo_device, mntpt);
         else
-                sprintf(cmd, "mount -t ext3 %s %s", data->device, mntpt);
+                sprintf(cmd, "mount -t ext3 %s %s", mop->mo_device, mntpt);
         ret = run_command(cmd, cmd_out);
         if (ret) {
-                fprintf(stderr, "Unable to mount %s\n",
-                        data->device);
+                fprintf(stderr, "Unable to mount %s\n", mop->mo_device);
                 goto out_rmdir;
         }
 
-        /* Save the disk mount options into a file.  llmount must pre-read
-           this file to get the real mount options. I suppose you could try
-           to parse this out of the *-conf logs instead, but bleah. */
-        sprintf(filepnm, "%s/%s", mntpt, MOUNTOPTS_FILE_NAME);
+        /* Save the persistent mount data into a file. Lustre must pre-read
+           this file to get the real mount options. */
+        sprintf(filepnm, "%s/%s", mntpt, MOUNT_DATA_FILE);
         filep = fopen(filepnm, "w");
         if (!filep) {
                 fprintf(stderr,"Unable to create %s file\n", filepnm);
                 goto out_umnt;
         }
-
-        ret = fprintf(filep, "%x\n", data->magic);
-        if (ret <= 0) {
-                fprintf(stderr, "Can't write options file (%d)\n",
-                        ferror(filep));
-                goto out_close;
-        }
-        fprintf(filep, "%s\n", data->fs_type_string);
-        fprintf(filep, "%s\n", data->mountfsopts);
-        fprintf(filep, "%x\n", data->disk_type);
-        fwrite(&data->mgt.host, sizeof(data->mgt.host), 1, filep);
+        fwrite(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep);
         fclose(filep);
         
-        if IS_MDS(data) {
-                uint32_t stripe_size = data->u.mds.stripe_size;
-                uint32_t stripe_count = data->u.mds.stripe_count;
-                uint32_t stripe_pattern = data->u.mds.stripe_pattern;
+        /* FIXME this info should be part of the startup llogs */
+        if (IS_MDT(mop->mo_ldd)) {
+                uint32_t stripe_size = mop->mo_stripe_sz;
+                uint32_t stripe_count = mop->mo_stripe_count;
+                uint32_t stripe_pattern = mop->mo_stripe_pattern;
                 sprintf(filepnm, "%s/%s", mntpt, STRIPE_FILE);
                 filep = fopen(filepnm, "w");
                 if (!filep) {
@@ -352,7 +317,8 @@ int write_local_files(struct lustre_disk_data *data)
                 goto out_umnt;
         }
         memset(&lsd, 0, sizeof(lsd));
-        strncpy(lsd.lsd_uuid, data->obduuid, sizeof(lsd.lsd_uuid));
+        strncpy(lsd.lsd_uuid, mop->mo_ldd.ldd_svname, sizeof(lsd.lsd_uuid));
+        lsd.lsd_index = mop->mo_index;
         fwrite(&lsd, sizeof(lsd), 1, filep);
         ret = 0;
 
@@ -366,78 +332,91 @@ out_rmdir:
         return ret;
 }
 
-/* build fs according to type 
-   FIXME:dangerous */
-int make_lustre_backfs(struct lustre_disk_data *data)
+/* Build fs according to type */
+int make_lustre_backfs(struct mkfs_opts *mop)
 {
         int i,ret=0;
         int block_count = 0;
         char mkfs_cmd[256];
         char buf[40];
 
-        if (data->device_size != 0) {
-                if (data->device_size < 8096){
+        if (mop->mo_device_sz != 0) {
+                if (mop->mo_device_sz < 8096){
                         fprintf(stderr, "size of filesystem must be larger "
                                 "than 8MB, but is set to %dKB\n",
-                                data->device_size);
+                                mop->mo_device_sz);
                         return EINVAL;
                 }
-                block_count = data->device_size / 4; /* block size is 4096 */
+                block_count = mop->mo_device_sz / 4; /* block size is 4096 */
         }       
         
-        //FIXME: no ext2 here 
-        if ((data->fs_type == FS_EXT3) || (data->fs_type == FS_LDISKFS)) { 
-                if (data->device_size == 0){
-                        sprintf(cmd, "sfdisk -s %s", data->device);
+        if ((mop->ldd.ldd_fs_type == LDD_FS_TYPE_EXT3) ||
+            (mop->ldd.ldd_fs_type == LDD_FS_TYPE_LDISKFS)) { 
+                long device_sz = mop->mo_device_sz;
+                long inode_sz = 0;
+                if (device_sz == 0){
+                        sprintf(cmd, "sfdisk -s %s", mop->mo_device);
                         ret = run_command(cmd, cmd_out);
                         if (ret == 0)
-                                data->device_size = atol(cmd_out[0]);
+                                device_sz = atol(cmd_out[0]);
                         else 
-                                data->device_size =
-                                device_size_proc(data->device);
-                }                                
-                if (data->journal_size == 0) {    //FIXME: kick the jdev in prototype 
-                        if (data->device_size > 1024 * 1024) 
-                                data->journal_size = (data->device_size / 
-                                                      102400) * 4;
-                        if (data->journal_size > 400)
-                                data->journal_size = 400;
-                } 
-                if (data->journal_size > 0) {
-                        sprintf(buf, " -J size=%d", data->journal_size);
-                        strcat(data->mkfsopts, buf);
+                                device_sz = device_size_proc(mop->mo_device);
+                }           
+
+                if (strstr(mop->mo_mkfsopts, "-J" == NULL) {
+                        long journal_sz = 0;
+                        if (device_sz > 1024 * 1024) 
+                                journal_sz = (device_sz / 102400) * 4;
+                        if (journal_sz > 400)
+                                journal_sz = 400;
+                        if (journal_sz) {
+                                sprintf(buf, " -J size=%d", journal_sz);
+                                strcat(mop->mo_mkfsopts, buf);
+                        }
                 }
-                if (data->inode_size > 0) {
-                        sprintf(buf, " -i %d ", data->inode_size);
-                        strcat(data->mkfsopts, buf);
+
+                /* The larger the bytes-per-inode ratio, the fewer inodes
+                   will  be  created. */
+                if (mop->mo_stripe_count > 77)
+                        inode_sz = 4096;
+                else if (mop->mo_stripe_count > 35)
+                        inode_sz = 2048;
+                else if (IS_MDT(mop->mo_ldd)) 
+                        inode_sz = 1024;
+                else if ((IS_OST(mop->mo_ldd) && (device_sz > 1000000) 
+                        inode_sz = 16384;
+                if (inode_sz > 0) {
+                        sprintf(buf, " -i %d", mop->inode_size);
+                        strcat(mop->mkfsopts, buf);
                 }
 
-                sprintf(mkfs_cmd, "mkfs.ext2 -j -b 4096 -L %s ", data->obdname);
-                //FIXME: losing the jdev in this phases 
-        } else if (data->fs_type == FS_REISERFS) {
-                if (data->journal_size > 0) {
-                        sprintf(buf, " --journal_size %d", data->journal_size);
-                        strcat(data->mkfsopts, buf);
+                sprintf(mkfs_cmd, "mkfs.ext2 -j -b 4096 -L %s ",
+                        mop->mo_ldd.ldd_svname);
+
+        } else if (mop->ldd.ldd_fs_type == LDD_FS_TYPE_REISERFS) {
+                long journal_sz = 0;
+                if (journal_sz > 0) { /* FIXME */
+                        sprintf(buf, " --journal_size %d", journal_sz);
+                        strcat(mop->mkfsopts, buf);
                 }
                 sprintf(mkfs_cmd, "mkreiserfs -ff ");
+
         } else {
                 fprintf(stderr,"unsupported fs type: %s\n",
-                        data->fs_type_string);
+                        mop->fs_type_string);
                 return EINVAL;
         }
 
         vprint("formatting backing filesystem %s on %s\n",
-               data->fs_type_string, data->device);
-        vprint("\tdevice label %s\n", data->fsname);
-        vprint("\tdevice size  %dMB\n", data->device_size / 1024);
-        vprint("\tjournal size %d\n", data->journal_size);
-        vprint("\tinode size   %d\n", data->inode_size);
+               mop->mo_mount_type_string, mop->mo_device);
+        vprint("\tdevice name  %s\n", mop->mo_ldd.ldd_svname);
         vprint("\t4k blocks    %d\n", block_count);
-        vprint("\toptions      %s\n", data->mkfsopts);
+        vprint("\toptions      %s\n", mop->mo_mkfsopts);
 
         /* mkfs_cmd's trailing space is important! */
-        strcat(mkfs_cmd, data->mkfsopts);
-        strcat(mkfs_cmd, data->device);
+        strcat(mkfs_cmd, mop->mo_mkfsopts);
+        strcat(mkfs_cmd, " ");
+        strcat(mkfs_cmd, mop->mo_device);
         if (block_count != 0) {
                 sprintf(buf, " %d", block_count);
                 strcat(mkfs_cmd, buf);
@@ -446,7 +425,7 @@ int make_lustre_backfs(struct lustre_disk_data *data)
         vprint("mkfs_cmd = %s\n", mkfs_cmd);
         ret = run_command(mkfs_cmd, cmd_out);
         if (ret != 0) {
-                fprintf(stderr, "Unable to build fs: %s \n", data->device);
+                fprintf(stderr, "Unable to build fs: %s \n", mop->mo_device);
                 for (i = 0; i < 32; i++) {
                         if (strlen(cmd_out[i]) == 0)
                                 break;
@@ -455,12 +434,15 @@ int make_lustre_backfs(struct lustre_disk_data *data)
                 return EIO;
         }
 
-        if ((data->fs_type == FS_EXT3) || (data->fs_type == FS_LDISKFS)) { 
-                sprintf(cmd, "tune2fs -O dir_index %s", data->device);
+        /* Enable hashed b-tree directory lookup in large dirs 
+           FIXME MDT only? */
+        if ((mop->ldd.ldd_fs_type == LDD_FS_TYPE_EXT3) ||
+            (mop->ldd.ldd_fs_type == LDD_FS_TYPE_LDISKFS)) { 
+                sprintf(cmd, "tune2fs -O dir_index %s", mop->mo_device);
                 ret = run_command(cmd, cmd_out);
                 if (ret) {
                         fprintf(stderr,"Unable to enable htree: %s\n",
-                                data->device);
+                                mop->device);
                         exit(4);
                 }
         }
@@ -468,34 +450,26 @@ int make_lustre_backfs(struct lustre_disk_data *data)
         return ret;
 }
 
-int setup_loop_device(struct lustre_disk_data *data)
+int setup_loop_device(struct mkfs_opts *mop)
 {
         char loop_device[20] = "";
         int ret = 0;
        
         init_loop_base();
 
-#if 0  /* Do we need this? */
-        if (find_assigned_loop(data->device, loop_device)) {
-                fprintf(stderr,"WARNING file %s already mapped to %s\n",
-                        data->device, loop_device);
-                return 1;
-        }
-#endif
-        
         sprintf(cmd, "dd if=/dev/zero bs=1k count=0 seek=%d of=%s", 
-                data->device_size, data->device);
+                mop->mo_device_sz, mop->mo_device);
         ret = run_command(cmd, cmd_out);
         if (ret != 0){
                 fprintf(stderr, "Unable to create backing store: %d\n", ret);
                 return ret;
         }
 
-        ret = setup_loop(data->device, loop_device);
+        ret = setup_loop(mop->mo_device, loop_device);
         
         if (ret == 0)
                 /* Our device is now the loop device, not the file name */
-                strcpy(data->device, loop_device);
+                strcpy(mop->mo_device, loop_device);
         else
                 fprintf(stderr, "Loop device setup failed %d\n", ret);
                 
@@ -569,7 +543,7 @@ static int _do_jt(int (*cmd)(int argc, char **argv), char *cmd_name, ...)
 #define do_jt(cmd, a...)  if ((ret = _do_jt(cmd, #cmd, ## a))) goto out 
 #define do_jt_noret(cmd, a...)  _do_jt(cmd, #cmd, ## a) 
 
-int lustre_log_setup(struct lustre_disk_data *data)
+int lustre_log_setup(struct mkfs_opts *mop)
 {
         char confname[] = "confobd";
         char name[128];
@@ -588,22 +562,22 @@ int lustre_log_setup(struct lustre_disk_data *data)
         ret = do_jt_noret(jt_lcfg_device, "cfg_device", confname, 0);
         if (ret)
                 return ENODEV;
-        do_jt(jt_lcfg_setup,  "setup", data->device,  data->fs_type_string,
-              data->mountfsopts, 0);
+        do_jt(jt_lcfg_setup,  "setup", mop->device,  mop->fs_type_string,
+              mop->mountfsopts, 0);
         do_jt(jt_obd_device,  "device", "confobd", 0);
 
-        sprintf(name, "%s-conf", data->obdname);
+        sprintf(name, "%s-conf", mop->mo_ldd.ldd_svname);
 
-        if (IS_OST(data)) {
+        if (IS_OST(mop->mo_ldd)) {
                 do_jt(jt_cfg_clear_log, "clear_log", name, 0);
                 do_jt(jt_cfg_record,    "record", name, 0);
-                do_jt(jt_lcfg_attach, "attach", "obdfilter", data->obdname,
-                      data->obduuid, 0);
-                do_jt(jt_lcfg_device, "cfg_device", data->obdname, 0);
-                do_jt(jt_lcfg_setup,  "setup", data->device, 
-                      data->fs_type_string,
-                      "n", /*data->u.ost.host.failover_addr*/
-                      data->mountfsopts, 0);
+                do_jt(jt_lcfg_attach, "attach", "obdfilter", mop->mo_ldd.ldd_svname,
+                      mop->obduuid, 0);
+                do_jt(jt_lcfg_device, "cfg_device", mop->mo_ldd.ldd_svname, 0);
+                do_jt(jt_lcfg_setup,  "setup", mop->device, 
+                      mop->fs_type_string,
+                      "n", /*mop->u.ost.host.failover_addr*/
+                      mop->mountfsopts, 0);
                 do_jt(jt_cfg_endrecord, "endrecord", 0);
                 do_jt(jt_cfg_dump_log,  "dump_log", name, 0);
 
@@ -615,26 +589,26 @@ int lustre_log_setup(struct lustre_disk_data *data)
                 do_jt(jt_cfg_endrecord, "endrecord", 0);
         }
 
-        if (IS_MDS(data)) {
+        if (IS_MDT(mop->mo_ldd)) {
                 /* write mds-conf log */
                 do_jt(jt_cfg_clear_log, "clear_log", name, 0);
                 do_jt(jt_cfg_record,    "record", name, 0);
                 do_jt(jt_lcfg_attach,   "attach", "mdt", "MDT", "MDT_UUID", 0);
                 do_jt(jt_lcfg_device,   "cfg_device", "MDT", 0);
                 do_jt(jt_lcfg_setup,    "setup", 0);
-                do_jt(jt_lcfg_attach,   "attach", "mds", data->obdname,
-                      data->obduuid, 0);
-                do_jt(jt_lcfg_device,   "cfg_device", data->obdname, 0);
-                do_jt(jt_lcfg_setup,    "setup", data->device,
-                      data->fs_type_string,
-                      data->obdname, data->mountfsopts, 0);
+                do_jt(jt_lcfg_attach,   "attach", "mds", mop->mo_ldd.ldd_svname,
+                      mop->obduuid, 0);
+                do_jt(jt_lcfg_device,   "cfg_device", mop->mo_ldd.ldd_svname, 0);
+                do_jt(jt_lcfg_setup,    "setup", mop->device,
+                      mop->fs_type_string,
+                      mop->mo_ldd.ldd_svname, mop->mountfsopts, 0);
                 do_jt(jt_cfg_endrecord, "endrecord", 0);
                 
                 /* TEMPORARY - needs to be moved into mds_setup for 1st mount */
 #if 0
                 /* write mds startup log */
-                do_jt(jt_cfg_clear_log,  "clear_log", data->obdname, 0);
-                do_jt(jt_cfg_record,     "record", data->obdname, 0);
+                do_jt(jt_cfg_clear_log,  "clear_log", mop->mo_ldd.ldd_svname, 0);
+                do_jt(jt_cfg_record,     "record", mop->mo_ldd.ldd_svname, 0);
                 /*add_uuid NID_uml2_UUID uml2 tcp
                   network tcp
                   add_peer uml2 uml2 988*/
@@ -644,11 +618,11 @@ int lustre_log_setup(struct lustre_disk_data *data)
                 */
                 do_jt(jt_lcfg_attach,    "attach", "lov", "lov_c", "lov_c_uuid", 0);
                 do_jt(jt_lcfg_lov_setup, "lov_setup", "lovA_uuid",
-                      "0" /*data->u.mds.stripe_count*/,
-                      "1048576" /*data->u.mds.stripe_size*/,
+                      "0" /*mop->u.mds.stripe_count*/,
+                      "1048576" /*mop->u.mds.stripe_size*/,
                       "0" /* stripe_off FIXME */,
                       "0" /* stripe_pattern */, 0);
-                do_jt(jt_lcfg_mount_option,"mount_option", data->obdname, "lov_c", 0);
+                do_jt(jt_lcfg_mount_option,"mount_option", mop->mo_ldd.ldd_svname, "lov_c", 0);
                 do_jt(jt_cfg_endrecord, "endrecord", 0);
 #endif        
         }
@@ -683,54 +657,54 @@ static int load_module(char *module_name)
         return rc;
 }
 
-/* Make the mds/ost obd name based on the filesystem name */
-static void make_obdname(struct lustre_disk_data *data)
+/* Make the mdt/ost server obd name based on the filesystem name */
+static void make_sv_name(struct mkfs_opts *mop)
 {
-        int maxlen = sizeof(data->obdname) - 1;
-
-        if (IS_MDS(data)) {
-                snprintf(data->obdname, maxlen, "MDS%s", data->fsname);
-        } else if (IS_OST(data)) {
-                char number[5];
-                snprintf(data->obdname, maxlen, "OST%s", data->fsname);
-                /* FIXME if we're not given an index, we might
-                   have to change our name later -- can't have two ost's
-                   with the same name. Rewrite ost log?? */
-                snprintf(number, 5, "%04x", data->u.ost.stripe_index);
-                /* Truncate label if need be to get the whole index number */
-                data->obdname[maxlen - 4] = 0;
-                strcat(data->obdname, number);
-        } else {
-                snprintf(data->obdname, maxlen, "MGT%s", data->fsname);
-        }         
+        /* FIXME if we're not given an index, we have to change our name
+           later -- can't have two servers with the same name. 
+           So rewrite ost log and last_rcvd, or we need to talk
+           to MGMT now to get index # */
 
-        /* This uuid must match the client's concept of the server uuid in
-           confobd_update_logs*/
-        snprintf(data->obduuid, sizeof(data->obduuid), "%s_UUID", data->obdname); 
+        if (IS_MGMT(mop->mo_ldd)) {
+                sprintf(mop->mo_ldd.ldd_svname, "MGMT");
+        } else {
+                sprintf(mop->mo_ldd.ldd_svname, "%.8s-%s%04x",
+                        mop->mo_ldd.ldd_fsname,
+                        IS_MDT(mop->mo_ldd) ? "MDT" : "OST",  
+                        mop->mo_index);
 }
 
-void set_defaults(struct lustre_disk_data *data)
+void set_defaults(struct mkfs_opts *mop)
 {
-        data->magic = LDD_MAGIC;
+        mop->mo_ldd.ldd_magic = LDD_MAGIC;
 
         if (get_os_version() == 24) { 
-                data->fs_type = FS_EXT3;
-                strcpy(data->fs_type_string, "ext3");
+                mop->ldd.ldd_fs_type = LDD_FS_TYPE_EXT3;
+                strcpy(mop->fs_type_string, "ext3");
         } else {
-                data->fs_type = FS_LDISKFS;
-                strcpy(data->fs_type_string, "ldiskfs");
+                mop->ldd.ldd_fs_type = LDD_FS_TYPE_LDISKFS;
+                strcpy(mop->fs_type_string, "ldiskfs");
         }
         
-        strcpy(data->fsname, "lustre");
+        strcpy(mop->mo_ldd.ldd_fsname, "lustre");
+        mop->mo_index = -1;
+}
+
+static inline badopt(char opt, char *type)
+{
+        fprintf(stderr, "%s: '%c' only valid for %s\n",
+                progname, opt, type);
+        usage(stderr);
+        exit(1);
 }
 
 int main(int argc , char *const argv[])
 {
-        struct lustre_disk_data data;
+        struct mkfs_opts mop;
         static struct option long_opt[] = {
                 {"help", 0, 0, 'h'},
                 {"fsname",1, 0, 'n'},
-                {"mgtnode", 1, 0, 'm'},
+                {"mgmtnode", 1, 0, 'm'},
                 {"failover", 1, 0, 'f'},
                 {"device_size", 1, 0, 'd'},
                 {"stripe_count", 1, 0, 'c'},
@@ -744,24 +718,21 @@ int main(int argc , char *const argv[])
         char opt;
         int  ret = 0;
 
+        progname = argv[0];
         if (argc < 3) 
                 usage(stderr);
            
-        progname = argv[0];
-        memset(&data, 0, sizeof(data));
-        set_defaults(&data);
-
-        if (!strcasecmp(argv[1], "mds")){
-                data.disk_type |= MDS_DISK_TYPE;
-                data.disk_type |= MGT_DISK_TYPE;
-        }
-        else if (!strcasecmp(argv[1], "ost")) {
-                data.disk_type |= OST_DISK_TYPE;
-                data.u.ost.stripe_index = -1;
-        } else if (!strcasecmp(argv[1], "mgt")) {
-                data.disk_type |= MGT_DISK_TYPE;
+        memset(&mop, 0, sizeof(mop));
+        set_defaults(&mop);
+
+        if (strcasecmp(argv[1], "mdt") == 0) {
+                mop.mo_ldd.ldd_flags |= LDD_SV_TYPE_MDT;
+        } else if (strcasecmp(argv[1], "ost") == 0) {
+                mop.mo_ldd.ldd_flags |= LDD_SV_TYPE_OST;
+        } else if (strcasecmp(argv[1], "mgmt") == 0) {
+                mop.mo_ldd.ldd_flags |= LDD_SV_TYPE_MGT;
         } else {
-                fprintf(stderr, "%s: need to know disk type :{mds,ost,mgt}\n",
+                fprintf(stderr, "%s: must set server type :{mdt,ost,mgmt}\n",
                                 progname);
                 usage(stderr);
         }
@@ -774,82 +745,60 @@ int main(int argc , char *const argv[])
                         usage(stdout);
                         break;
                 case 'n':
+                        if (IS_MGMT(&mop.mo_ldd))
+                                badopt(opt, "MDT,OST");
+                        if (strlen(optarg) > 8) {
+                                fprintf(stderr, "%s: filesystem name must be "
+                                        "<= 8 chars\n", progname);
+                                exit(1);
+                        }
                         if (optarg[0] != 0) 
-                                strncpy(data.fsname, optarg, 
-                                        sizeof(data.fsname) - 1);
+                                strncpy(mop.mo_ldd.ldd_fsname, optarg, 
+                                        sizeof(mop.mo_ldd.ldd_fsname) - 1);
                         break;
                 case 'm':
-                        if IS_MGT(&data){
-                                fprintf(stderr, "%s: wrong option for mgt"
-                                        "%s \n", progname, optarg);
-                                usage(stderr);
-                        }
-                        if IS_MDS(&data){
-                                /* FIXME after the network is done */  
-                                strcpy(data.u.mds.mgt_node.host, optarg);
-                                data.disk_type &= ~MGT_DISK_TYPE;
-                        }
-                        if IS_OST(&data)
-                                strcpy(data.u.ost.mgt_node.host,optarg);
+                        if (IS_MGMT(&mop.mo_ldd))
+                                badopt(opt, "MDT,OST");
+                        mop.mo_ldd->ldd_mgmt.primary = libcfs_str2nid(optarg);
                         break;
-                case 'f': /* FIXME after the network is done */  
-                        if IS_MDS(&data)
-                                strcpy(data.u.mds.host.failover_addr,optarg);
-                        if IS_OST(&data)
-                                strcpy(data.u.ost.host.failover_addr,optarg);
-                        if IS_MGT(&data)
-                                strcpy(data.mgt.host.failover_addr,optarg);
+                case 'f':
+                        mop.mo_failover_nid = libcfs_str2nid(optarg);
                         break;
                 case 'd':
-                        data.device_size = atol (optarg); 
+                        mop.mo_device_sz = atol(optarg); 
                         break;
                 case 'c':
-                        if IS_MDS(&data) {
+                        if (IS_MDT(&mop.mo_ldd)) {
                                 int stripe_count = atol(optarg);
-                                data.u.mds.stripe_count = stripe_count;
-                                if (stripe_count > 77)
-                                        data.inode_size = 4096;
-                                else if (stripe_count > 35)
-                                        data.inode_size = 2048;
-                                else 
-                                        data.inode_size = 1024;
-                        }
-                        if IS_MGT(&data)
-                                data.mgt.stripe_count = atol(optarg);
-                        else {
-                                fprintf(stderr,"%s: wrong option for ost %s\n",
-                                        progname,optarg);
-                                usage(stderr);
+                                mop.mo_stripe_count = stripe_count;
+                        } else {
+                                badopt(opt, "MDT");
                         }
                         break;
                 case 's':
-                        if IS_MDS(&data)
-                                data.u.mds.stripe_size = atol(optarg)*1024;
-                        else {
-                                fprintf(stderr, "%s: wrong option for mgt%s\n",
-                                        progname,optarg);
-                                usage(stderr);
-                        }
+                        if (IS_MDT(&mop.mo_ldd)) 
+                                mop.mo_stripe_sz = atol(optarg) * 1024;
+                        else 
+                                badopt(opt, "MDT");
                         break;
                 case 'i':
-                        if IS_OST(&data)
-                                data.u.ost.stripe_index = atol(optarg);
-                        else {
-                                fprintf(stderr, 
-                                        "%s: wrong option for mds/mgt%s\n",
-                                        progname,optarg);
-                                usage(stderr);
-                        }
+                        if (IS_MGMT(&mop.mo_ldd))
+                                badopt(opt, "MDT,OST");
+                        else
+                                mop.mo_index = atol(optarg);
                         break;
                 case 'S':
-                        data.fs_type = FS_SMFS;
-                        strcpy(data.fs_type_string, "smfs");
-                        strncpy(data.mkfsopts, optarg, sizeof(data.mkfsopts)-1);
+                        mop.mo_ldd.ldd_fs_type = LDD_FS_TYPE_SMFS;
+                        strcpy(mop.fs_type_string, "smfs");
+                        strncpy(mop.mkfsopts, optarg, sizeof(mop.mkfsopts) - 1);
                         break;
                 case 'e':
-                        data.fs_type = FS_EXT3;
-                        strcpy(data.fs_type_string, "ext3");
-                        strncpy(data.mkfsopts, optarg, sizeof(data.mkfsopts)-1);
+                        if ((mop.mo_ldd.ldd_fs_type == LDD_FS_TYPE_LDISKFS) ||
+                            (mop.mo_ldd.ldd_fs_type == LDD_FS_TYPE_EXT3))
+                                strncpy(mop.mkfsopts, optarg, 
+                                        sizeof(mop.mkfsopts) - 1);
+                        else
+                                badopt(opt, "ext3,ldiskfs");
                         break;
                 default:
                         fprintf(stderr, "%s: unknown option '%c'\n",
@@ -859,65 +808,60 @@ int main(int argc , char *const argv[])
                 }
         }//while
 
-        strcpy(data.device, argv[optind]);
+        strcpy(mop.mo_device, argv[optind]);
 
-        if (IS_OST(&data) && (data.u.ost.stripe_index == -1)) {
-                fprintf(stderr, "You must set --stripe_index=?? for now.\n");
-                return EINVAL;
-        }
+        if (IS_MDT(&mop.mo_ldd) && (mop.mo_stripe_sz == 0))
+                mop.mo_stripe_sz = 1024 * 1024;
         
-        if (IS_MDS(&data) && (data.u.mds.stripe_size == 0))
-                data.u.mds.stripe_size = 1024 * 1024;
-        
-        /* These are the mount options stored in the log files.  
-           llmount must use the same options to start the confobd. */
-        if ((data.fs_type == FS_EXT3) || (data.fs_type == FS_LDISKFS)) {
-                sprintf(data.mountfsopts, "errors=remount-ro");
-                if (IS_MDS(&data))
-                        strcat(data.mountfsopts, ",iopen_nopriv");
-                if ((IS_OST(&data)) && (get_os_version() == 24))
-                        strcat(data.mountfsopts, ",asyncdel");
-        } else if (data.fs_type == FS_SMFS) {
-                sprintf(data.mountfsopts, "type=ext3,dev=%s", data.device);
+        /* These are the permanent mount options. */ 
+        if ((mop.mo_ldd.ldd_fs_type == LDD_FS_TYPE_EXT3) ||
+            (mop.mo_ldd.ldd_fs_type == LDD_FS_TYPE_LDISKFS)) {
+                sprintf(mop.mo_ldd.ldd_mountfsopts, "errors=remount-ro");
+                if (IS_MDT(&mop.mo_ldd))
+                        strcat(mop.mo_ldd.ldd_mountfsopts, ",iopen_nopriv");
+                if ((IS_OST(&mop.mo_ldd)) && (get_os_version() == 24))
+                        strcat(mmop.mo_ldd.ldd_mountfsopts, ",asyncdel");
+        } else if (mop.mo_ldd.ldd_fs_type == LDD_FS_TYPE_SMFS) {
+                sprintf(mop.mo_ldd.ldd_mountfsopts, "type=ext3,dev=%s",
+                        mop.mo_device);
         } else {
                 fprintf(stderr, "%s: unknown fs type %d '%s'\n",
-                        progname, data.fs_type, data.fs_type_string);
+                        progname, mop.mo_ldd.ldd_fs_type,
+                        mop.mo_mount_type_string);
                 return EINVAL;
         }
     
-        make_obdname(&data);
-
-        if (load_module("confobd"))
-                return ENOSYS;
+        make_obdname(&mop);
 
-        if ((data.fs_type == FS_SMFS) || !is_block(data.device)) {
-                data.flags |= MO_IS_LOOP;
-                ret = setup_loop_device(&data);
+        if ((mop->mo_ldd.ldd_fs_type == LDD_FS_TYPE_SMFS) ||
+            !is_block(mop.device)) {
+                mop.flags |= MO_IS_LOOP;
+                ret = setup_loop_device(&mop);
                 if (ret) 
                         return ret;
         }
                 
-        ret = make_lustre_backfs(&data);
+        ret = make_lustre_backfs(&mop);
         if (ret != 0) {
               fprintf(stderr, "mkfs failed\n");
               goto out;
         }
         
-        ret = write_local_files(&data);
+        ret = write_local_files(&mop);
         if (ret != 0) {
               fprintf(stderr, "failed to write local files\n");
               goto out;
         }
 
-        ret = lustre_log_setup(&data);
+        ret = lustre_log_setup(&mop);
         if (ret != 0) {
               fprintf(stderr, "failed to write setup logs\n");
               goto out;
         }
 
 out:
-        if (data.flags & MO_IS_LOOP) {
-                sprintf(cmd, "losetup -d %s", data.device);
+        if (mop.mo_flags & MO_IS_LOOP) {
+                sprintf(cmd, "losetup -d %s", mop.mo_device);
                 ret = run_command(cmd, cmd_out);
         }