X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Futils%2Fmkfs_lustre.c;h=87ba9b65440ef7c56f8fbd6d67166b697bc80985;hp=a333ac684d3c3fe1137d3ebc6b86031d25e5f6cb;hb=848f9e20320cb7c01eaf7f1b5c27f5efd54e4818;hpb=7c5643233b91cf4729878cd8792255e5d12f481e diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index a333ac6..87ba9b6 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -27,7 +27,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, Whamcloud, Inc. + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -47,6 +47,7 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif +#include "mount_utils.h" #include #include #include @@ -64,20 +65,10 @@ #include #include -#ifdef __linux__ -/* libcfs.h is not really needed here, but on SLES10/PPC, fs.h includes idr.h - * which requires BITS_PER_LONG to be defined */ -#include -#ifndef BLKGETSIZE64 -#include /* for BLKGETSIZE64 */ -#endif -#include -#endif #include #include #include #include -#include "mount_utils.h" #ifndef PATH_MAX #define PATH_MAX 4096 @@ -88,49 +79,92 @@ int verbose = 1; static int print_only = 0; static int upgrade_to_18 = 0; +#ifdef HAVE_LDISKFS_OSD +#define FSLIST_LDISKFS "ldiskfs" +#define HAVE_FSLIST +#else + #define FSLIST_LDISKFS "" +#endif /* HAVE_LDISKFS_OSD */ +#ifdef HAVE_ZFS_OSD + #ifdef HAVE_FSLIST + #define FSLIST_ZFS "|zfs" + #else + #define FSLIST_ZFS "zfs" + #define HAVE_FSLIST + #endif +#else + #define FSLIST_ZFS "" +#endif /* HAVE_ZFS_OSD */ + +#ifndef HAVE_FSLIST + #error "no backing OSD types (ldiskfs or ZFS) are configured" +#endif + +#define FSLIST FSLIST_LDISKFS FSLIST_ZFS + void usage(FILE *out) { - fprintf(out, "%s v"LUSTRE_VERSION_STRING"\n", progname); - fprintf(out, "usage: %s [options] \n", progname); - fprintf(out, - "\t:block device or file (e.g /dev/sda or /tmp/ost1)\n" - "\ttarget types:\n" - "\t\t--ost: object storage, mutually exclusive with mdt,mgs\n" - "\t\t--mdt: metadata storage, mutually exclusive with ost\n" - "\t\t--mgs: configuration management service - one per site\n" - "\toptions (in order of popularity):\n" - "\t\t--mgsnode=[,<...>] : NID(s) of a remote mgs node\n" - "\t\t\trequired for all targets other than the mgs node\n" - "\t\t--fsname= : default is 'lustre'\n" - "\t\t--failnode=[,<...>] : NID(s) of a failover partner\n" - "\t\t\tcannot be used with --servicenode\n" - "\t\t--servicenode=[,<...>] : NID(s) of all service partners\n" - "\t\t\ttreat all nodes as equal service node, cannot be used with --failnode\n" - "\t\t--param = : set a permanent parameter\n" - "\t\t\te.g. --param sys.timeout=40\n" - "\t\t\t --param lov.stripesize=2M\n" - "\t\t--index=#N : target index (i.e. ost index within lov)\n" - "\t\t--comment=: arbitrary string (%d bytes)\n" - "\t\t--mountfsoptions= : permanent mount options\n" - "\t\t--network=[,<...>] : restrict OST/MDT to network(s)\n" + fprintf(out, "%s v"LUSTRE_VERSION_STRING"\n", progname); + fprintf(out, "usage: %s [--backfstype="FSLIST"] " + "--fsname=\n" + "\t--index= [options] \n", progname); +#ifdef HAVE_ZFS_OSD + fprintf(out, "usage: %s --backfstype=zfs " + "--fsname= [options]\n" + "\t/\n" + "\t[[] [ ...] [vdev type>] ...]\n", + progname); +#endif + fprintf(out, + "\t:block device or file (e.g /dev/sda or /tmp/ost1)\n" +#ifdef HAVE_ZFS_OSD + "\t: name of ZFS pool where target is created " + "(e.g. tank)\n" + "\t: name of new dataset, must be unique within " + "pool (e.g. ost1)\n" + "\t: type of vdev (mirror, raidz, raidz2, spare, " + "cache, log)\n" +#endif + "\n" + "\ttarget types:\n" + "\t\t--mgs: configuration management service\n" + "\t\t--mdt: metadata storage, mutually exclusive with ost\n" + "\t\t--ost: object storage, mutually exclusive with mdt, mgs\n" + "\toptions (in order of popularity):\n" + "\t\t--index=#N: numerical target index (0..N)\n" + "\t\t\trequired for all targets other than the MGS\n" + "\t\t--fsname=<8_char_filesystem_name>: fs targets belong to\n" + "\t\t\trequired for all targets other than MGS\n" + "\t\t--mgsnode=[,<...>]: NID(s) of remote MGS\n" + "\t\t\trequired for all targets other than MGS\n" + "\t\t--mountfsoptions=: permanent mount options\n" + "\t\t--failnode=[,<...>]: NID(s) of backup failover node\n" + "\t\t\tmutually exclusive with --servicenode\n" + "\t\t--servicenode=[,<...>]: NID(s) of service partners\n" + "\t\t\ttreat nodes as equal service node, mutually exclusive " + "with --failnode\n" + "\t\t--param =: set a permanent parameter\n" + "\t\t\te.g. --param sys.timeout=40\n" + "\t\t\t --param lov.stripesize=2M\n" + "\t\t--network=[,<...>]: restrict OST/MDT to network(s)\n" #ifndef TUNEFS - "\t\t--backfstype= : backing fs type (ext3, ldiskfs)\n" - "\t\t--device-size=#N(KB) : device size for loop devices\n" - "\t\t--mkfsoptions= : format options\n" - "\t\t--reformat: overwrite an existing disk\n" - "\t\t--stripe-count-hint=#N : for optimizing MDT inode size\n" - "\t\t--iam-dir: use IAM directory format, not ext3 compatible\n" + "\t\t--backfstype=: backing fs type (ext3, ldiskfs)\n" + "\t\t--device-size=#N(KB): device size for loop devices\n" + "\t\t--mkfsoptions=: format options\n" + "\t\t--reformat: overwrite an existing disk\n" + "\t\t--stripe-count-hint=#N: for optimizing MDT inode size\n" #else - "\t\t--erase-params : erase all old parameter settings\n" - "\t\t--nomgs: turn off MGS service on this MDT\n" - "\t\t--writeconf: erase all config logs for this fs.\n" + "\t\t--erase-params: erase all old parameter settings\n" + "\t\t--nomgs: turn off MGS service on this MDT\n" + "\t\t--writeconf: erase all config logs for this fs.\n" + "\t\t--quota: enable space accounting on old 2.x device.\n" #endif - "\t\t--dryrun: just report what we would do; " - "don't write to disk\n" - "\t\t--verbose : e.g. show mkfs progress\n" - "\t\t--quiet\n", - (int)sizeof(((struct lustre_disk_data *)0)->ldd_userdata)); - return; + "\t\t--comment=: arbitrary string (%d bytes)\n" + "\t\t--dryrun: report what we would do; don't write to disk\n" + "\t\t--verbose: e.g. show mkfs progress\n" + "\t\t--quiet\n", + (int)sizeof(((struct lustre_disk_data *)0)->ldd_userdata)); + return; } /* ==================== Lustre config functions =============*/ @@ -148,7 +182,7 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) printf("Lustre FS: %s\n", ldd->ldd_fsname); printf("Mount type: %s\n", MT_STR(ldd)); printf("Flags: %#x\n", ldd->ldd_flags); - printf(" (%s%s%s%s%s%s%s%s%s%s)\n", + printf(" (%s%s%s%s%s%s%s%s%s)\n", IS_MDT(ldd) ? "MDT ":"", IS_OST(ldd) ? "OST ":"", IS_MGS(ldd) ? "MGS ":"", @@ -156,7 +190,6 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) ldd->ldd_flags & LDD_F_VIRGIN ? "first_time ":"", ldd->ldd_flags & LDD_F_UPDATE ? "update ":"", ldd->ldd_flags & LDD_F_WRITECONF ? "writeconf ":"", - ldd->ldd_flags & LDD_F_IAM_DIR ? "IAM_dir_format ":"", ldd->ldd_flags & LDD_F_NO_PRIMNODE? "no_primnode ":"", ldd->ldd_flags & LDD_F_UPGRADE14 ? "upgrade1.4 ":""); printf("Persistent mount opts: %s\n", ldd->ldd_mount_opts); @@ -168,15 +201,18 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) void set_defaults(struct mkfs_opts *mop) { - mop->mo_ldd.ldd_magic = LDD_MAGIC; - mop->mo_ldd.ldd_config_ver = 1; - mop->mo_ldd.ldd_flags = LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_VIRGIN; - mop->mo_mgs_failnodes = 0; - strcpy(mop->mo_ldd.ldd_fsname, "lustre"); - mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS; - - mop->mo_ldd.ldd_svindex = INDEX_UNASSIGNED; - mop->mo_stripe_count = 1; + mop->mo_ldd.ldd_magic = LDD_MAGIC; + mop->mo_ldd.ldd_config_ver = 1; + mop->mo_ldd.ldd_flags = LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_VIRGIN; +#ifdef HAVE_LDISKFS_OSD + mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS; +#else + mop->mo_ldd.ldd_mount_type = LDD_MT_ZFS; +#endif + mop->mo_ldd.ldd_svindex = INDEX_UNASSIGNED; + mop->mo_mgs_failnodes = 0; + mop->mo_stripe_count = 1; + mop->mo_pool_vdevs = NULL; } static inline void badopt(const char *opt, char *type) @@ -209,6 +245,7 @@ static char *convert_hostnames(char *s1) sep = *s2; *s2 = '\0'; nid = libcfs_str2nid(s1); + *s2 = sep; if (nid == LNET_NID_ANY) { fprintf(stderr, "%s: Can't parse NID '%s'\n", @@ -237,7 +274,6 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, char **mountopts) { static struct option long_opt[] = { - {"iam-dir", 0, 0, 'a'}, {"backfstype", 1, 0, 'b'}, {"stripe-count-hint", 1, 0, 'c'}, {"comment", 1, 0, 'u'}, @@ -268,6 +304,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, {"writeconf", 0, 0, 'w'}, {"upgrade_to_18", 0, 0, 'U'}, {"network", 1, 0, 't'}, + {"quota", 0, 0, 'Q'}, {0, 0, 0, 0} }; char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqrs:t:Uu:vw"; @@ -278,11 +315,6 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, while ((opt = getopt_long(argc, argv, optstring, long_opt, &longidx)) != EOF) { switch (opt) { - case 'a': { - if (IS_MDT(&mop->mo_ldd)) - mop->mo_ldd.ldd_flags |= LDD_F_IAM_DIR; - break; - } case 'b': { int i = 0; while (i < LDD_MT_LAST) { @@ -292,6 +324,11 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, } i++; } + if (i == LDD_MT_LAST) { + fprintf(stderr, "%s: invalid backend filesystem" + " type %s\n", progname, optarg); + return 1; + } break; } case 'c': @@ -470,6 +507,9 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, case 'U': upgrade_to_18 = 1; break; + case 'Q': + mop->mo_flags |= MO_QUOTA; + break; default: if (opt != '?') { fatal(); @@ -479,16 +519,20 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, } }//while - /* Last arg is device */ - if (optind != argc - 1) { - fatal(); - fprintf(stderr, "Bad argument: %s\n", argv[optind]); - return EINVAL; - } - - /* single argument: */ - if (argc == 2) - ++print_only; + if (optind == argc) { + /* The user didn't specify device name */ + fatal(); + fprintf(stderr, "Not enough arguments - device name or " + "pool/dataset name not specified.\n"); + return EINVAL; + } else { + /* The device or pool/filesystem name */ + strscpy(mop->mo_device, argv[optind], sizeof(mop->mo_device)); + + /* Followed by optional vdevs */ + if (optind < argc - 1) + mop->mo_pool_vdevs = (char **) &argv[optind + 1]; + } return 0; } @@ -519,6 +563,10 @@ int main(int argc, char *const argv[]) /* device is last arg */ strscpy(mop.mo_device, argv[argc - 1], sizeof(mop.mo_device)); + ret = osd_init(); + if (ret) + return ret; + #ifdef TUNEFS /* For tunefs, we must read in the old values before parsing any new ones. */ @@ -532,14 +580,27 @@ int main(int argc, char *const argv[]) ret = ENODEV; goto out; } + mop.mo_ldd.ldd_mount_type = mount_type; - ret = read_local_files(&mop); + ret = osd_read_ldd(mop.mo_device, &mop.mo_ldd); if (ret) { fatal(); fprintf(stderr, "Failed to read previous Lustre data from %s " "(%d)\n", mop.mo_device, ret); goto out; } + mop.mo_ldd.ldd_flags &= ~(LDD_F_WRITECONF | LDD_F_VIRGIN); + + /* svname of the form lustre:OST1234 means never registered */ + ret = strlen(mop.mo_ldd.ldd_svname); + if (mop.mo_ldd.ldd_svname[ret - 8] == ':') { + mop.mo_ldd.ldd_svname[ret - 8] = '-'; + mop.mo_ldd.ldd_flags |= LDD_F_VIRGIN; + } else if (mop.mo_ldd.ldd_svname[ret - 8] == '=') { + mop.mo_ldd.ldd_svname[ret - 8] = '-'; + mop.mo_ldd.ldd_flags |= LDD_F_WRITECONF; + } + if (strstr(mop.mo_ldd.ldd_params, PARAM_MGSNODE)) mop.mo_mgs_failnodes++; @@ -567,6 +628,10 @@ int main(int argc, char *const argv[]) goto out; } + /* Stand alone MGS doesn't need a index */ + if (!IS_MDT(ldd) && IS_MGS(ldd)) + mop.mo_ldd.ldd_flags &= ~LDD_F_NEED_INDEX; + if ((mop.mo_ldd.ldd_flags & (LDD_F_NEED_INDEX | LDD_F_UPGRADE14)) == (LDD_F_NEED_INDEX | LDD_F_UPGRADE14)) { fatal(); @@ -575,6 +640,18 @@ int main(int argc, char *const argv[]) ret = EINVAL; goto out; } + + if (mop.mo_ldd.ldd_flags & LDD_F_NEED_INDEX) + fprintf(stderr, "warning: %s: for Lustre 2.4 and later, the " + "target index must be specified with --index\n", + mop.mo_device); + + /* If no index is supplied for MDT by default set index to zero */ + if (IS_MDT(ldd) && (ldd->ldd_svindex == INDEX_UNASSIGNED)) { + mop.mo_ldd.ldd_flags &= ~LDD_F_NEED_INDEX; + mop.mo_ldd.ldd_svindex = 0; + } + #if 0 /* * Comment out these 2 checks temporarily, since for multi-MDSes @@ -595,6 +672,12 @@ int main(int argc, char *const argv[]) goto out; } #endif + if ((IS_MDT(ldd) || IS_OST(ldd)) && mop.mo_ldd.ldd_fsname[0] == '\0') { + fatal(); + fprintf(stderr, "Must specify --fsname for MDT/OST device\n"); + ret = EINVAL; + goto out; + } /* These are the permanent mount options (always included) */ ret = osd_prepare_lustre(&mop, @@ -678,16 +761,31 @@ int main(int argc, char *const argv[]) } /* Format the backing filesystem */ - ret = make_lustre_backfs(&mop); + ret = osd_make_lustre(&mop); if (ret != 0) { fatal(); fprintf(stderr, "mkfs failed %d\n", ret); goto out; } +#else + /* update svname with '=' to refresh config */ + if (mop.mo_ldd.ldd_flags & LDD_F_WRITECONF) { + struct mount_opts opts; + opts.mo_ldd = mop.mo_ldd; + opts.mo_source = mop.mo_device; + (void) osd_label_lustre(&opts); + } + + /* Enable quota accounting */ + if (mop.mo_flags & MO_QUOTA) { + ret = osd_enable_quota(&mop); + goto out; + } + #endif /* Write our config files */ - ret = write_local_files(&mop); + ret = osd_write_ldd(&mop); if (ret != 0) { fatal(); fprintf(stderr, "failed to write local files\n"); @@ -696,6 +794,7 @@ int main(int argc, char *const argv[]) out: loop_cleanup(&mop); + osd_fini(); /* Fix any crazy return values from system() */ if (ret && ((ret & 255) == 0))