X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Futils%2Fmkfs_lustre.c;h=45ad45fd09b7187e548deb55c46d6588e3c7dd32;hp=bcdfc29d947dee7544a3da70107145071ee0ee40;hb=2a9056699f3be22b0a51408564fc18ba83f3c12e;hpb=b5a8338cbc9e25e94c50f65f510008c1fbf9dca9 diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index bcdfc29..45ad45f 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -28,6 +26,8 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, Whamcloud, Inc. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -65,8 +65,8 @@ #include #ifdef __linux__ -/* libcfs.h is not really needed here, but on SLES10/PPC, fs.h includes idr.h which - * requires BITS_PER_LONG to be defined */ +/* libcfs.h is not really needed here, but on SLES10/PPC, fs.h includes idr.h + * which requires BITS_PER_LONG to be defined */ #include #ifndef BLKGETSIZE64 #include /* for BLKGETSIZE64 */ @@ -83,646 +83,97 @@ #define PATH_MAX 4096 #endif -#define MAX_LOOP_DEVICES 16 -#define L_BLOCK_SIZE 4096 -#define INDEX_UNASSIGNED 0xFFFF -#define MO_IS_LOOP 0x01 -#define MO_FORCEFORMAT 0x02 - -/* used to describe the options to format the lustre disk, not persistent */ -struct mkfs_opts { - struct lustre_disk_data mo_ldd; /* to be written in MOUNT_DATA_FILE */ - char mo_device[128]; /* disk device name */ - char mo_mkfsopts[128]; /* options to the backing-store mkfs */ - char mo_loopdev[128]; /* in case a loop dev is needed */ - __u64 mo_device_sz; /* in KB */ - int mo_stripe_count; - int mo_flags; - int mo_mgs_failnodes; -}; - char *progname; int verbose = 1; static int print_only = 0; -static int failover = 0; static int upgrade_to_18 = 0; -void usage(FILE *out) -{ - fprintf(out, "%s v"LUSTRE_VERSION_STRING"\n", progname); - fprintf(out, "usage: %s [options] \n", progname); - fprintf(out, - "\t:block device or file (e.g /dev/sda or /tmp/ost1)\n" - "\ttarget types:\n" - "\t\t--ost: object storage, mutually exclusive with mdt,mgs\n" - "\t\t--mdt: metadata storage, mutually exclusive with ost\n" - "\t\t--mgs: configuration management service - one per site\n" - "\toptions (in order of popularity):\n" - "\t\t--mgsnode=[,<...>] : NID(s) of a remote mgs node\n" - "\t\t\trequired for all targets other than the mgs node\n" - "\t\t--fsname= : default is 'lustre'\n" - "\t\t--failnode=[,<...>] : NID(s) of a failover partner\n" - "\t\t--param = : set a permanent parameter\n" - "\t\t\te.g. --param sys.timeout=40\n" - "\t\t\t --param lov.stripesize=2M\n" - "\t\t--index=#N : target index (i.e. ost index within the lov)\n" - /* FIXME implement 1.6.x - "\t\t--configdev=: store configuration info\n" - "\t\t\tfor this device on an alternate device\n" - */ - "\t\t--comment=: arbitrary user string (%d bytes)\n" - "\t\t--mountfsoptions= : permanent mount options\n" - "\t\t--network=[,<...>] : network(s) to restrict this ost/mdt to\n" -#ifndef TUNEFS - "\t\t--backfstype= : backing fs type (ext3, ldiskfs)\n" - "\t\t--device-size=#N(KB) : device size for loop devices\n" - "\t\t--mkfsoptions= : format options\n" - "\t\t--reformat: overwrite an existing disk\n" - "\t\t--stripe-count-hint=#N : used for optimizing MDT inode size\n" - "\t\t--iam-dir: make use of IAM directory format on backfs, incompatible with ext3.\n" +#ifdef HAVE_LDISKFS_OSD +#define FSLIST_LDISKFS "ldiskfs" +#define HAVE_FSLIST #else - "\t\t--erase-params : erase all old parameter settings\n" - "\t\t--nomgs: turn off MGS service on this MDT\n" - "\t\t--writeconf: erase all config logs for this fs.\n" -#endif - "\t\t--dryrun: just report what we would do; " - "don't write to disk\n" - "\t\t--verbose : e.g. show mkfs progress\n" - "\t\t--quiet\n", - (int)sizeof(((struct lustre_disk_data *)0)->ldd_userdata)); - return; -} - -#define vprint if (verbose > 0) printf -#define verrprint if (verbose >= 0) printf - -/*================ utility functions =====================*/ - -char *strscat(char *dst, char *src, int buflen) { - dst[buflen - 1] = 0; - if (strlen(dst) + strlen(src) >= buflen) { - fprintf(stderr, "string buffer overflow (max %d): '%s' + '%s'" - "\n", buflen, dst, src); - exit(EOVERFLOW); - } - return strcat(dst, src); - -} - -char *strscpy(char *dst, char *src, int buflen) { - dst[0] = 0; - return strscat(dst, src, buflen); -} - -inline unsigned int -dev_major (unsigned long long int __dev) -{ - return ((__dev >> 8) & 0xfff) | ((unsigned int) (__dev >> 32) & ~0xfff); -} - -inline unsigned int -dev_minor (unsigned long long int __dev) -{ - return (__dev & 0xff) | ((unsigned int) (__dev >> 12) & ~0xff); -} - -int get_os_version() -{ - static int version = 0; - - if (!version) { - int fd; - char release[4] = ""; - - fd = open("/proc/sys/kernel/osrelease", O_RDONLY); - if (fd < 0) - fprintf(stderr, "%s: Warning: Can't resolve kernel " - "version, assuming 2.6\n", progname); - else { - if (read(fd, release, 4) < 0) { - fprintf(stderr, "reading from /proc/sys/kernel" - "/osrelease: %s\n", strerror(errno)); - close(fd); - exit(-1); - } - close(fd); - } - if (strncmp(release, "2.4.", 4) == 0) - version = 24; - else - version = 26; - } - return version; -} - -static int check_mtab_entry(char *spec) -{ - FILE *fp; - struct mntent *mnt; - - fp = setmntent(MOUNTED, "r"); - if (fp == NULL) - return(0); - - while ((mnt = getmntent(fp)) != NULL) { - if (strcmp(mnt->mnt_fsname, spec) == 0) { - endmntent(fp); - fprintf(stderr, "%s: according to %s %s is " - "already mounted on %s\n", - progname, MOUNTED, spec, mnt->mnt_dir); - return(EEXIST); - } - } - endmntent(fp); - - return(0); -} - -/*============ disk dev functions ===================*/ - -/* Setup a file in the first unused loop_device */ -int loop_setup(struct mkfs_opts *mop) -{ - char loop_base[20]; - char l_device[64]; - int i, ret = 0; - - /* Figure out the loop device names */ - if (!access("/dev/loop0", F_OK | R_OK)) { - strcpy(loop_base, "/dev/loop\0"); - } else if (!access("/dev/loop/0", F_OK | R_OK)) { - strcpy(loop_base, "/dev/loop/\0"); - } else { - fprintf(stderr, "%s: can't access loop devices\n", progname); - return EACCES; - } - - /* Find unused loop device */ - for (i = 0; i < MAX_LOOP_DEVICES; i++) { - char cmd[PATH_MAX]; - int cmdsz = sizeof(cmd); - - sprintf(l_device, "%s%d", loop_base, i); - if (access(l_device, F_OK | R_OK)) - break; - snprintf(cmd, cmdsz, "losetup %s > /dev/null 2>&1", l_device); - ret = system(cmd); - - /* losetup gets 1 (ret=256) for non-set-up device */ - if (ret) { - /* Set up a loopback device to our file */ - snprintf(cmd, cmdsz, "losetup %s %s", l_device, - mop->mo_device); - ret = run_command(cmd, cmdsz); - if (ret == 256) - /* someone else picked up this loop device - * behind our back */ - continue; - if (ret) { - fprintf(stderr, "%s: error %d on losetup: %s\n", - progname, ret, strerror(ret)); - return ret; - } - strscpy(mop->mo_loopdev, l_device, - sizeof(mop->mo_loopdev)); - return ret; - } - } - - fprintf(stderr, "%s: out of loop devices!\n", progname); - return EMFILE; -} - -int loop_cleanup(struct mkfs_opts *mop) -{ - char cmd[150]; - int ret = 1; - if ((mop->mo_flags & MO_IS_LOOP) && *mop->mo_loopdev) { - sprintf(cmd, "losetup -d %s", mop->mo_loopdev); - ret = run_command(cmd, sizeof(cmd)); - } - return ret; -} - -/* Determine if a device is a block device (as opposed to a file) */ -int is_block(char* devname) -{ - struct stat st; - int ret = 0; - - ret = access(devname, F_OK); - if (ret != 0) - return 0; - ret = stat(devname, &st); - if (ret != 0) { - fprintf(stderr, "%s: cannot stat %s\n", progname, devname); - return -1; - } - return S_ISBLK(st.st_mode); -} - -__u64 get_device_size(char* device) -{ - int ret, fd; - __u64 size = 0; - - fd = open(device, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "%s: cannot open %s: %s\n", - progname, device, strerror(errno)); - return 0; - } - -#ifdef BLKGETSIZE64 - /* size in bytes. bz5831 */ - ret = ioctl(fd, BLKGETSIZE64, (void*)&size); + #define FSLIST_LDISKFS "" +#endif /* HAVE_LDISKFS_OSD */ +#ifdef HAVE_ZFS_OSD + #ifdef HAVE_FSLIST + #define FSLIST_ZFS "|zfs" + #else + #define FSLIST_ZFS "zfs" + #define HAVE_FSLIST + #endif #else - { - __u32 lsize = 0; - /* size in blocks */ - ret = ioctl(fd, BLKGETSIZE, (void*)&lsize); - size = (__u64)lsize * 512; - } -#endif - close(fd); - if (ret < 0) { - fprintf(stderr, "%s: size ioctl failed: %s\n", - progname, strerror(errno)); - return 0; - } + #define FSLIST_ZFS "" +#endif /* HAVE_ZFS_OSD */ - vprint("device size = "LPU64"MB\n", size >> 20); - /* return value in KB */ - return size >> 10; -} - -int loop_format(struct mkfs_opts *mop) -{ - int ret = 0; - - if (mop->mo_device_sz == 0) { - fatal(); - fprintf(stderr, "loop device requires a --device-size= " - "param\n"); - return EINVAL; - } - - ret = creat(mop->mo_device, S_IRUSR|S_IWUSR); - if (ret < 0) { - ret = errno; - fprintf(stderr, "%s: Unable to create backing store: %d\n", - progname, ret); - } else { - close(ret); - } - - ret = truncate(mop->mo_device, mop->mo_device_sz * 1024); - if (ret != 0) { - ret = errno; - fprintf(stderr, "%s: Unable to truncate backing store: %d\n", - progname, ret); - } - - return ret; -} - -/* Display the need for the latest e2fsprogs to be installed. make_backfs - * indicates if the caller is make_lustre_backfs() or not. */ -static void disp_old_e2fsprogs_msg(const char *feature, int make_backfs) -{ - static int msg_displayed; - - if (msg_displayed) { - fprintf(stderr, "WARNING: %s does not support %s " - "feature.\n\n", E2FSPROGS, feature); - return; - } - - msg_displayed++; - - fprintf(stderr, "WARNING: The %s package currently installed on " - "your system does not support \"%s\" feature.\n", - E2FSPROGS, feature); -#if !(HAVE_LDISKFSPROGS) - fprintf(stderr, "Please install the latest version of e2fsprogs from\n" - "http://downloads.lustre.org/public/tools/e2fsprogs/\n" - "to enable this feature.\n"); +#ifndef HAVE_FSLIST + #error "no backing OSD types (ldiskfs or ZFS) are configured" #endif - if (make_backfs) - fprintf(stderr, "Feature will not be enabled until %s" - "is updated and '%s -O %s %%{device}' " - "is run.\n\n", E2FSPROGS, TUNE2FS, feature); -} - -/* Check whether the file exists in the device */ -static int file_in_dev(char *file_name, char *dev_name) -{ - FILE *fp; - char debugfs_cmd[256]; - unsigned int inode_num; - int i; - - /* Construct debugfs command line. */ - snprintf(debugfs_cmd, sizeof(debugfs_cmd), - "%s -c -R 'stat %s' '%s' 2>&1 | egrep '(Inode|unsupported)'", - DEBUGFS, file_name, dev_name); - - fp = popen(debugfs_cmd, "r"); - if (!fp) { - fprintf(stderr, "%s: %s\n", progname, strerror(errno)); - return 0; - } - if (fscanf(fp, "Inode: %u", &inode_num) == 1) { /* exist */ - pclose(fp); - return 1; - } - i = fread(debugfs_cmd, 1, sizeof(debugfs_cmd), fp); - if (i) { - debugfs_cmd[i] = 0; - fprintf(stderr, "%s", debugfs_cmd); - if (strstr(debugfs_cmd, "unsupported feature")) { - disp_old_e2fsprogs_msg("an unknown", 0); - } - pclose(fp); - return -1; - } - pclose(fp); - return 0; -} +#define FSLIST FSLIST_LDISKFS FSLIST_ZFS -/* Check whether the device has already been used with lustre */ -static int is_lustre_target(struct mkfs_opts *mop) -{ - int rc; - - vprint("checking for existing Lustre data: "); - - if ((rc = file_in_dev(MOUNT_DATA_FILE, mop->mo_device))) { - vprint("found %s\n", - (rc == 1) ? MOUNT_DATA_FILE : "extents"); - /* in the -1 case, 'extents' means this really IS a lustre - target */ - return rc; - } - - if ((rc = file_in_dev(LAST_RCVD, mop->mo_device))) { - vprint("found %s\n", LAST_RCVD); - return rc; - } - - vprint("not found\n"); - return 0; /* The device is not a lustre target. */ -} - -/* Check if a certain feature is supported by e2fsprogs. - * Firstly we try to use "debugfs supported_features" command to check if - * the feature is supported. If this fails we try to set this feature with - * mke2fs to check for its support. */ -static int is_e2fsprogs_feature_supp(const char *feature) -{ - FILE *fp; - char cmd[PATH_MAX]; - char imgname[] = "/tmp/test-img-XXXXXX"; - int fd = -1; - int ret = 0; - - snprintf(cmd, sizeof(cmd), "%s -c -R \"supported_features %s\" 2>&1", - DEBUGFS, feature); - - /* Using popen() instead of run_command() since debugfs does not return - * proper error code if command is not supported */ - fp = popen(cmd, "r"); - if (!fp) { - fprintf(stderr, "%s: %s\n", progname, strerror(errno)); - return 0; - } - ret = fread(cmd, 1, sizeof(cmd), fp); - if (ret > 0) { - if (strstr(cmd, feature) && !(strstr(cmd, "Unknown"))) - return 0; - } - - if ((fd = mkstemp(imgname)) < 0) - return -1; - - snprintf(cmd, sizeof(cmd), "%s -F -O %s %s 100 >/dev/null 2>&1", - MKE2FS, feature, imgname); - /* run_command() displays the output of mke2fs when it fails for - * some feature, so use system() directly */ - ret = system(cmd); - if (fd >= 0) - remove(imgname); - - return ret; -} - -static void enable_default_backfs_features(struct mkfs_opts *mop) -{ - struct utsname uts; - int ret; - - if (IS_OST(&mop->mo_ldd)) - strscat(mop->mo_mkfsopts, " -O dir_index,extents", - sizeof(mop->mo_mkfsopts)); - else if (IS_MDT(&mop->mo_ldd)) - strscat(mop->mo_mkfsopts, " -O dir_index,dirdata", - sizeof(mop->mo_mkfsopts)); - else - strscat(mop->mo_mkfsopts, " -O dir_index", - sizeof(mop->mo_mkfsopts)); - - /* Upstream e2fsprogs called our uninit_groups feature uninit_bg, - * check for both of them when testing e2fsprogs features. */ - if (is_e2fsprogs_feature_supp("uninit_bg") == 0) - strscat(mop->mo_mkfsopts, ",uninit_bg", - sizeof(mop->mo_mkfsopts)); - else if (is_e2fsprogs_feature_supp("uninit_groups") == 0) - strscat(mop->mo_mkfsopts, ",uninit_groups", - sizeof(mop->mo_mkfsopts)); - else - disp_old_e2fsprogs_msg("uninit_bg", 1); - - ret = uname(&uts); - if (ret) - return; - - /* Multiple mount protection is enabled only if failover node is - * specified and if kernel version is higher than 2.6.9 */ - if (failover) { - if (is_e2fsprogs_feature_supp("mmp") == 0) - strscat(mop->mo_mkfsopts, ",mmp", - sizeof(mop->mo_mkfsopts)); - else - disp_old_e2fsprogs_msg("mmp", 1); - } -} -/* Build fs according to type */ -int make_lustre_backfs(struct mkfs_opts *mop) +void usage(FILE *out) { - __u64 device_sz = mop->mo_device_sz, block_count = 0; - char mkfs_cmd[PATH_MAX]; - char buf[64]; - char *dev; - int ret = 0; - - if (!(mop->mo_flags & MO_IS_LOOP)) { - mop->mo_device_sz = get_device_size(mop->mo_device); - - if (mop->mo_device_sz == 0) - return ENODEV; - - /* Compare to real size */ - if (device_sz == 0 || device_sz > mop->mo_device_sz) - device_sz = mop->mo_device_sz; - else - mop->mo_device_sz = device_sz; - } - - if (mop->mo_device_sz != 0) { - if (mop->mo_device_sz < 8096){ - fprintf(stderr, "%s: size of filesystem must be larger " - "than 8MB, but is set to %lldKB\n", - progname, (long long)mop->mo_device_sz); - return EINVAL; - } - block_count = mop->mo_device_sz / (L_BLOCK_SIZE >> 10); - /* If the LUN size is just over 2^32 blocks, limit the - * filesystem size to 2^32-1 blocks to avoid problems with - * ldiskfs/mkfs not handling this size. Bug 22906 */ - if (block_count > 0xffffffffULL && block_count < 0x100002000ULL) - block_count = 0xffffffffULL; - } - - if ((mop->mo_ldd.ldd_mount_type == LDD_MT_EXT3) || - (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS) || - (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS2)) { - /* Journal size in MB */ - if (strstr(mop->mo_mkfsopts, "-J") == NULL) { - /* Choose our own default journal size */ - long journal_sz = 0, max_sz; - if (device_sz > 1024 * 1024) /* 1GB */ - journal_sz = (device_sz / 102400) * 4; - /* cap journal size at 1GB */ - if (journal_sz > 1024L) - journal_sz = 1024L; - /* man mkfs.ext3 */ - max_sz = (102400 * L_BLOCK_SIZE) >> 20; /* 400MB */ - if (journal_sz > max_sz) - journal_sz = max_sz; - if (journal_sz) { - sprintf(buf, " -J size=%ld", journal_sz); - strscat(mop->mo_mkfsopts, buf, - sizeof(mop->mo_mkfsopts)); - } - } - - /* Bytes_per_inode: disk size / num inodes */ - if (strstr(mop->mo_mkfsopts, "-i") == NULL) { - long bytes_per_inode = 0; - - if (IS_MDT(&mop->mo_ldd)) - bytes_per_inode = 4096; - - /* Allocate fewer inodes on large OST devices. Most - filesystems can be much more aggressive than even - this. */ - if ((IS_OST(&mop->mo_ldd) && (device_sz > 100000000))) - bytes_per_inode = 16384; /* > 100 Gb device */ - - - if (bytes_per_inode > 0) { - sprintf(buf, " -i %ld", bytes_per_inode); - strscat(mop->mo_mkfsopts, buf, - sizeof(mop->mo_mkfsopts)); - } - } - - /* Inode size (for extended attributes). The LOV EA size is - * 32 (EA hdr) + 32 (lov_mds_md) + stripes * 24 (lov_ost_data), - * and we want some margin above that for ACLs, other EAs... */ - if (strstr(mop->mo_mkfsopts, "-I") == NULL) { - long inode_size = 0; - if (IS_MDT(&mop->mo_ldd)) { - if (mop->mo_stripe_count > 72) - inode_size = 512; /* bz 7241 */ - /* cray stripes across all osts (>60) */ - else if (mop->mo_stripe_count > 32) - inode_size = 2048; - else if (mop->mo_stripe_count > 10) - inode_size = 1024; - else - inode_size = 512; - } else if (IS_OST(&mop->mo_ldd)) { - /* now as we store fids in EA on OST we need - to make inode bigger */ - inode_size = 256; - } - - if (inode_size > 0) { - sprintf(buf, " -I %ld", inode_size); - strscat(mop->mo_mkfsopts, buf, - sizeof(mop->mo_mkfsopts)); - } - } - - if (verbose < 2) { - strscat(mop->mo_mkfsopts, " -q", - sizeof(mop->mo_mkfsopts)); - } - - if (strstr(mop->mo_mkfsopts, "-O") == NULL) - enable_default_backfs_features(mop); - - /* Allow reformat of full devices (as opposed to - partitions.) We already checked for mounted dev. */ - strscat(mop->mo_mkfsopts, " -F", sizeof(mop->mo_mkfsopts)); - - snprintf(mkfs_cmd, sizeof(mkfs_cmd), - "%s -j -b %d -L %s ", MKE2FS, L_BLOCK_SIZE, - mop->mo_ldd.ldd_svname); - } else if (mop->mo_ldd.ldd_mount_type == LDD_MT_REISERFS) { - long journal_sz = 0; /* FIXME default journal size */ - if (journal_sz > 0) { - sprintf(buf, " --journal_size %ld", journal_sz); - strscat(mop->mo_mkfsopts, buf, - sizeof(mop->mo_mkfsopts)); - } - snprintf(mkfs_cmd, sizeof(mkfs_cmd), "mkreiserfs -ff "); - } else { - fprintf(stderr,"%s: unsupported fs type: %d (%s)\n", - progname, mop->mo_ldd.ldd_mount_type, - MT_STR(&mop->mo_ldd)); - return EINVAL; - } - - /* For loop device format the dev, not the filename */ - dev = mop->mo_device; - if (mop->mo_flags & MO_IS_LOOP) - dev = mop->mo_loopdev; - - vprint("formatting backing filesystem %s on %s\n", - MT_STR(&mop->mo_ldd), dev); - vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname); - vprint("\t4k blocks "LPU64"\n", block_count); - vprint("\toptions %s\n", mop->mo_mkfsopts); - - /* mkfs_cmd's trailing space is important! */ - strscat(mkfs_cmd, mop->mo_mkfsopts, sizeof(mkfs_cmd)); - strscat(mkfs_cmd, " ", sizeof(mkfs_cmd)); - strscat(mkfs_cmd, dev, sizeof(mkfs_cmd)); - if (block_count != 0) { - sprintf(buf, " "LPU64, block_count); - strscat(mkfs_cmd, buf, sizeof(mkfs_cmd)); - } - - vprint("mkfs_cmd = %s\n", mkfs_cmd); - ret = run_command(mkfs_cmd, sizeof(mkfs_cmd)); - if (ret) { - fatal(); - fprintf(stderr, "Unable to build fs %s (%d)\n", dev, ret); - } - return ret; + fprintf(out, "%s v"LUSTRE_VERSION_STRING"\n", progname); + fprintf(out, "usage: %s [--backfstype="FSLIST"] " + "--fsname=\n" + "\t--index= [options] \n", progname); +#ifdef HAVE_ZFS_OSD + fprintf(out, "usage: %s --backfstype=zfs " + "--fsname= [options]\n" + "\t/\n" + "\t[[] [ ...] [vdev type>] ...]\n", + progname); +#endif + fprintf(out, + "\t:block device or file (e.g /dev/sda or /tmp/ost1)\n" +#ifdef HAVE_ZFS_OSD + "\t: name of ZFS pool where target is created " + "(e.g. tank)\n" + "\t: name of new dataset, must be unique within " + "pool (e.g. ost1)\n" + "\t: type of vdev (mirror, raidz, raidz2, spare, " + "cache, log)\n" +#endif + "\n" + "\ttarget types:\n" + "\t\t--mgs: configuration management service\n" + "\t\t--mdt: metadata storage, mutually exclusive with ost\n" + "\t\t--ost: object storage, mutually exclusive with mdt, mgs\n" + "\toptions (in order of popularity):\n" + "\t\t--index=#N: numerical target index (0..N)\n" + "\t\t\trequired for all targets other than the MGS\n" + "\t\t--fsname=<8_char_filesystem_name>: fs targets belong to\n" + "\t\t\trequired for all targets other than MGS\n" + "\t\t--mgsnode=[,<...>]: NID(s) of remote MGS\n" + "\t\t\trequired for all targets other than MGS\n" + "\t\t--mountfsoptions=: permanent mount options\n" + "\t\t--failnode=[,<...>]: NID(s) of backup failover node\n" + "\t\t\tmutually exclusive with --servicenode\n" + "\t\t--servicenode=[,<...>]: NID(s) of service partners\n" + "\t\t\ttreat nodes as equal service node, mutually exclusive " + "with --failnode\n" + "\t\t--param =: set a permanent parameter\n" + "\t\t\te.g. --param sys.timeout=40\n" + "\t\t\t --param lov.stripesize=2M\n" + "\t\t--network=[,<...>]: restrict OST/MDT to network(s)\n" +#ifndef TUNEFS + "\t\t--backfstype=: backing fs type (ext3, ldiskfs)\n" + "\t\t--device-size=#N(KB): device size for loop devices\n" + "\t\t--mkfsoptions=: format options\n" + "\t\t--reformat: overwrite an existing disk\n" + "\t\t--stripe-count-hint=#N: for optimizing MDT inode size\n" +#else + "\t\t--erase-params: erase all old parameter settings\n" + "\t\t--nomgs: turn off MGS service on this MDT\n" + "\t\t--writeconf: erase all config logs for this fs.\n" + "\t\t--quota: enable space accounting on old 2.x device.\n" +#endif + "\t\t--comment=: arbitrary string (%d bytes)\n" + "\t\t--dryrun: report what we would do; don't write to disk\n" + "\t\t--verbose: e.g. show mkfs progress\n" + "\t\t--quiet\n", + (int)sizeof(((struct lustre_disk_data *)0)->ldd_userdata)); + return; } /* ==================== Lustre config functions =============*/ @@ -740,7 +191,7 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) printf("Lustre FS: %s\n", ldd->ldd_fsname); printf("Mount type: %s\n", MT_STR(ldd)); printf("Flags: %#x\n", ldd->ldd_flags); - printf(" (%s%s%s%s%s%s%s%s%s)\n", + printf(" (%s%s%s%s%s%s%s%s%s%s)\n", IS_MDT(ldd) ? "MDT ":"", IS_OST(ldd) ? "OST ":"", IS_MGS(ldd) ? "MGS ":"", @@ -749,6 +200,7 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) ldd->ldd_flags & LDD_F_UPDATE ? "update ":"", ldd->ldd_flags & LDD_F_WRITECONF ? "writeconf ":"", ldd->ldd_flags & LDD_F_IAM_DIR ? "IAM_dir_format ":"", + ldd->ldd_flags & LDD_F_NO_PRIMNODE? "no_primnode ":"", ldd->ldd_flags & LDD_F_UPGRADE14 ? "upgrade1.4 ":""); printf("Persistent mount opts: %s\n", ldd->ldd_mount_opts); printf("Parameters:%s\n", ldd->ldd_params); @@ -757,362 +209,20 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) printf("\n"); } -static int touch_file(char *filename) -{ - int fd; - - if (filename == NULL) { - return 1; - } - - fd = open(filename, O_CREAT | O_TRUNC, 0600); - if (fd < 0) { - return 1; - } else { - close(fd); - return 0; - } -} - -/* keep it less than LL_FID_NAMELEN */ -#define DUMMY_FILE_NAME_LEN 25 -#define EXT3_DIRENT_SIZE DUMMY_FILE_NAME_LEN - -/* Need to add these many entries to this directory to make HTREE dir. */ -#define MIN_ENTRIES_REQ_FOR_HTREE ((L_BLOCK_SIZE / EXT3_DIRENT_SIZE)) - -static int add_dummy_files(char *dir) -{ - char fpname[PATH_MAX]; - int i; - int rc; - - for (i = 0; i < MIN_ENTRIES_REQ_FOR_HTREE; i++) { - snprintf(fpname, PATH_MAX, "%s/%0*d", dir, - DUMMY_FILE_NAME_LEN, i); - - rc = touch_file(fpname); - if (rc && rc != -EEXIST) { - fprintf(stderr, - "%s: Can't create dummy file %s: %s\n", - progname, fpname , strerror(errno)); - return rc; - } - } - return 0; -} - -static int __l_mkdir(char * filepnm, int mode , struct mkfs_opts *mop) -{ - int ret; - - ret = mkdir(filepnm, mode); - if (ret && ret != -EEXIST) - return ret; - - /* IAM mode supports ext3 directories of HTREE type only. So add dummy - * entries to new directory to create htree type of container for - * this directory. */ - if (mop->mo_ldd.ldd_flags & LDD_F_IAM_DIR) - return add_dummy_files(filepnm); - return 0; -} - -/* Write the server config files */ -int write_local_files(struct mkfs_opts *mop) -{ - char mntpt[] = "/tmp/mntXXXXXX"; - char filepnm[128]; - char *dev; - FILE *filep; - int ret = 0; - size_t num; - - /* Mount this device temporarily in order to write these files */ - if (!mkdtemp(mntpt)) { - fprintf(stderr, "%s: Can't create temp mount point %s: %s\n", - progname, mntpt, strerror(errno)); - return errno; - } - - dev = mop->mo_device; - if (mop->mo_flags & MO_IS_LOOP) - dev = mop->mo_loopdev; - - ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0, - mop->mo_ldd.ldd_mount_opts); - if (ret) { - fprintf(stderr, "%s: Unable to mount %s: %s\n", - progname, dev, strerror(errno)); - ret = errno; - if (errno == ENODEV) { - fprintf(stderr, "Is the %s module available?\n", - MT_STR(&mop->mo_ldd)); - } - goto out_rmdir; - } - - /* Set up initial directories */ - sprintf(filepnm, "%s/%s", mntpt, MOUNT_CONFIGS_DIR); - ret = __l_mkdir(filepnm, 0777, mop); - if ((ret != 0) && (errno != EEXIST)) { - fprintf(stderr, "%s: Can't make configs dir %s (%s)\n", - progname, filepnm, strerror(errno)); - goto out_umnt; - } else if (errno == EEXIST) { - ret = 0; - } - - /* Save the persistent mount data into a file. Lustre must pre-read - this file to get the real mount options. */ - vprint("Writing %s\n", MOUNT_DATA_FILE); - sprintf(filepnm, "%s/%s", mntpt, MOUNT_DATA_FILE); - filep = fopen(filepnm, "w"); - if (!filep) { - fprintf(stderr, "%s: Unable to create %s file: %s\n", - progname, filepnm, strerror(errno)); - goto out_umnt; - } - num = fwrite(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep); - if (num < 1 && ferror(filep)) { - fprintf(stderr, "%s: Unable to write to file (%s): %s\n", - progname, filepnm, strerror(errno)); - goto out_umnt; - } - fclose(filep); - /* COMPAT_146 */ -#ifdef TUNEFS - /* Check for upgrade */ - if ((mop->mo_ldd.ldd_flags & (LDD_F_UPGRADE14 | SVTYPE_MGS)) - == (LDD_F_UPGRADE14 | SVTYPE_MGS)) { - char cmd[128]; - char *term; - int cmdsz = sizeof(cmd); - vprint("Copying old logs\n"); - - /* Copy the old client log to fsname-client */ - sprintf(filepnm, "%s/%s/%s-client", - mntpt, MOUNT_CONFIGS_DIR, mop->mo_ldd.ldd_fsname); - snprintf(cmd, cmdsz, "cp %s/%s/client %s", mntpt, MDT_LOGS_DIR, - filepnm); - ret = run_command(cmd, cmdsz); - if (ret) { - fprintf(stderr, "%s: Can't copy 1.4 config %s/client " - "(%d)\n", progname, MDT_LOGS_DIR, ret); - fprintf(stderr, "mount -t ldiskfs %s somewhere, " - "find the client log for fs %s and " - "copy it manually into %s/%s-client, " - "then umount.\n", - mop->mo_device, - mop->mo_ldd.ldd_fsname, MOUNT_CONFIGS_DIR, - mop->mo_ldd.ldd_fsname); - goto out_umnt; - } - - /* We need to use the old mdt log because otherwise mdt won't - have complete lov if old clients connect before all - servers upgrade. */ - /* Copy the old mdt log to fsname-MDT0000 (get old - name from mdt_UUID) */ - ret = 1; - strscpy(filepnm, (char *)mop->mo_ldd.ldd_uuid, sizeof(filepnm)); - term = strstr(filepnm, "_UUID"); - if (term) { - *term = '\0'; - snprintf(cmd, cmdsz, "cp %s/%s/%s %s/%s/%s", - mntpt, MDT_LOGS_DIR, filepnm, - mntpt, MOUNT_CONFIGS_DIR, - mop->mo_ldd.ldd_svname); - ret = run_command(cmd, cmdsz); - } - if (ret) { - fprintf(stderr, "%s: Can't copy 1.4 config %s/%s " - "(%d)\n", progname, MDT_LOGS_DIR, filepnm, ret); - fprintf(stderr, "mount -t ext3 %s somewhere, " - "find the MDT log for fs %s and " - "copy it manually into %s/%s, " - "then umount.\n", - mop->mo_device, - mop->mo_ldd.ldd_fsname, MOUNT_CONFIGS_DIR, - mop->mo_ldd.ldd_svname); - goto out_umnt; - } - } -#endif - /* end COMPAT_146 */ - -out_umnt: - umount(mntpt); -out_rmdir: - rmdir(mntpt); - return ret; -} - -int read_local_files(struct mkfs_opts *mop) -{ - char tmpdir[] = "/tmp/dirXXXXXX"; - char cmd[PATH_MAX]; - char filepnm[128]; - char *dev; - FILE *filep; - int ret = 0; - int cmdsz = sizeof(cmd); - - /* Make a temporary directory to hold Lustre data files. */ - if (!mkdtemp(tmpdir)) { - fprintf(stderr, "%s: Can't create temporary directory %s: %s\n", - progname, tmpdir, strerror(errno)); - return errno; - } - - dev = mop->mo_device; - - /* TODO: it's worth observing the get_mountdata() function that is - in mount_utils.c for getting the mountdata out of the - filesystem */ - - /* Construct debugfs command line. */ - snprintf(cmd, cmdsz, "%s -c -R 'dump /%s %s/mountdata' '%s'", - DEBUGFS, MOUNT_DATA_FILE, tmpdir, dev); - - ret = run_command(cmd, cmdsz); - if (ret) - verrprint("%s: Unable to dump %s dir (%d)\n", - progname, MOUNT_CONFIGS_DIR, ret); - - sprintf(filepnm, "%s/mountdata", tmpdir); - filep = fopen(filepnm, "r"); - if (filep) { - size_t num_read; - vprint("Reading %s\n", MOUNT_DATA_FILE); - num_read = fread(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep); - if (num_read < 1 && ferror(filep)) { - fprintf(stderr, "%s: Unable to read from file (%s): %s\n", - progname, filepnm, strerror(errno)); - goto out_close; - } - } else { - /* COMPAT_146 */ - /* Try to read pre-1.6 config from last_rcvd */ - struct lr_server_data lsd; - verrprint("%s: Unable to read %d.%d config %s.\n", - progname, LUSTRE_MAJOR, LUSTRE_MINOR, filepnm); - - verrprint("Trying 1.4 config from last_rcvd\n"); - sprintf(filepnm, "%s/%s", tmpdir, LAST_RCVD); - - /* Construct debugfs command line. */ - snprintf(cmd, cmdsz, "%s -c -R 'dump /%s %s' %s", - DEBUGFS, LAST_RCVD, filepnm, dev); - - ret = run_command(cmd, cmdsz); - if (ret) { - fprintf(stderr, "%s: Unable to dump %s file (%d)\n", - progname, LAST_RCVD, ret); - goto out_rmdir; - } - - filep = fopen(filepnm, "r"); - if (!filep) { - fprintf(stderr, "%s: Unable to open %s: %s\n", - progname, filepnm, strerror(errno)); - ret = errno; - verrprint("Contents of %s:\n", tmpdir); - verbose+=2; - snprintf(cmd, cmdsz, "ls -l %s/", tmpdir); - run_command(cmd, cmdsz); - verrprint("Contents of disk:\n"); - snprintf(cmd, cmdsz, "%s -c -R 'ls -l /' %s", - DEBUGFS, dev); - run_command(cmd, cmdsz); - - goto out_rmdir; - } - vprint("Reading %s\n", LAST_RCVD); - ret = fread(&lsd, 1, sizeof(lsd), filep); - if (ret < sizeof(lsd)) { - fprintf(stderr, "%s: Short read (%d of %d)\n", - progname, ret, (int)sizeof(lsd)); - ret = ferror(filep); - if (ret) - goto out_close; - } - vprint("Feature compat=%x, incompat=%x\n", - lsd.lsd_feature_compat, lsd.lsd_feature_incompat); - - if ((lsd.lsd_feature_compat & OBD_COMPAT_OST) || - (lsd.lsd_feature_incompat & OBD_INCOMPAT_OST)) { - mop->mo_ldd.ldd_flags = SVTYPE_OST; - mop->mo_ldd.ldd_svindex = lsd.lsd_ost_index; - } else if ((lsd.lsd_feature_compat & OBD_COMPAT_MDT) || - (lsd.lsd_feature_incompat & OBD_INCOMPAT_MDT)) { - /* We must co-locate so mgs can see old logs. - If user doesn't want this, they can copy the old - logs manually and re-tunefs. */ - mop->mo_ldd.ldd_flags = - SVTYPE_MDT | SVTYPE_MGS; - mop->mo_ldd.ldd_svindex = lsd.lsd_mdt_index; - } else { - /* If neither is set, we're pre-1.4.6, make a guess. */ - /* Construct debugfs command line. */ - snprintf(cmd, cmdsz, "%s -c -R 'rdump /%s %s' %s", - DEBUGFS, MDT_LOGS_DIR, tmpdir, dev); - run_command(cmd, cmdsz); - - sprintf(filepnm, "%s/%s", tmpdir, MDT_LOGS_DIR); - if (lsd.lsd_ost_index > 0) { - mop->mo_ldd.ldd_flags = SVTYPE_OST; - mop->mo_ldd.ldd_svindex = lsd.lsd_ost_index; - } else { - /* If there's a LOGS dir, it's an MDT */ - if ((ret = access(filepnm, F_OK)) == 0) { - mop->mo_ldd.ldd_flags = - SVTYPE_MDT | - SVTYPE_MGS; - /* Old MDT's are always index 0 - (pre CMD) */ - mop->mo_ldd.ldd_svindex = 0; - } else { - /* The index may not be correct */ - mop->mo_ldd.ldd_flags = - SVTYPE_OST | LDD_F_NEED_INDEX; - verrprint("OST with unknown index\n"); - } - } - } - - ret = 0; - memcpy(mop->mo_ldd.ldd_uuid, lsd.lsd_uuid, - sizeof(mop->mo_ldd.ldd_uuid)); - mop->mo_ldd.ldd_flags |= LDD_F_UPGRADE14; - } - /* end COMPAT_146 */ -out_close: - fclose(filep); - -out_rmdir: - snprintf(cmd, cmdsz, "rm -rf %s", tmpdir); - run_command(cmd, cmdsz); - if (ret) - verrprint("Failed to read old data (%d)\n", ret); - return ret; -} - - void set_defaults(struct mkfs_opts *mop) { - mop->mo_ldd.ldd_magic = LDD_MAGIC; - mop->mo_ldd.ldd_config_ver = 1; - mop->mo_ldd.ldd_flags = LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_VIRGIN; - mop->mo_mgs_failnodes = 0; - strcpy(mop->mo_ldd.ldd_fsname, "lustre"); - if (get_os_version() == 24) - mop->mo_ldd.ldd_mount_type = LDD_MT_EXT3; - else - mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS; - - mop->mo_ldd.ldd_svindex = INDEX_UNASSIGNED; - mop->mo_stripe_count = 1; + mop->mo_ldd.ldd_magic = LDD_MAGIC; + mop->mo_ldd.ldd_config_ver = 1; + mop->mo_ldd.ldd_flags = LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_VIRGIN; +#ifdef HAVE_LDISKFS_OSD + mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS; +#else + mop->mo_ldd.ldd_mount_type = LDD_MT_ZFS; +#endif + mop->mo_ldd.ldd_svindex = INDEX_UNASSIGNED; + mop->mo_mgs_failnodes = 0; + mop->mo_stripe_count = 1; + mop->mo_pool_vdevs = NULL; } static inline void badopt(const char *opt, char *type) @@ -1122,24 +232,6 @@ static inline void badopt(const char *opt, char *type) usage(stderr); } -static int add_param(char *buf, char *key, char *val) -{ - int end = sizeof(((struct lustre_disk_data *)0)->ldd_params); - int start = strlen(buf); - int keylen = 0; - - if (key) - keylen = strlen(key); - if (start + 1 + keylen + strlen(val) >= end) { - fprintf(stderr, "%s: params are too long-\n%s %s%s\n", - progname, buf, key ? key : "", val); - return 1; - } - - sprintf(buf + start, " %s%s", key ? key : "", val); - return 0; -} - /* from mount_lustre */ /* Get rid of symbolic hostnames for tcp, since kernel can't do lookups */ #define MAXNIDSTR 1024 @@ -1163,9 +255,11 @@ static char *convert_hostnames(char *s1) sep = *s2; *s2 = '\0'; nid = libcfs_str2nid(s1); + *s2 = sep; if (nid == LNET_NID_ANY) { - fprintf(stderr, "%s: Can't parse NID '%s'\n", progname, s1); + fprintf(stderr, "%s: Can't parse NID '%s'\n", + progname, s1); free(converted); return NULL; } @@ -1216,15 +310,18 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, {"print", 0, 0, 'n'}, {"quiet", 0, 0, 'q'}, {"reformat", 0, 0, 'r'}, + {"servicenode", 1, 0, 's'}, {"verbose", 0, 0, 'v'}, {"writeconf", 0, 0, 'w'}, {"upgrade_to_18", 0, 0, 'U'}, {"network", 1, 0, 't'}, + {"quota", 0, 0, 'Q'}, {0, 0, 0, 0} }; - char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqru:vw"; + char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqrs:t:Uu:vw"; int opt; int rc, longidx; + int failnode_set = 0, servicenode_set = 0; while ((opt = getopt_long(argc, argv, optstring, long_opt, &longidx)) != EOF) { @@ -1243,6 +340,11 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, } i++; } + if (i == LDD_MT_LAST) { + fprintf(stderr, "%s: invalid backend filesystem" + " type %s\n", progname, optarg); + return 1; + } break; } case 'c': @@ -1271,8 +373,19 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, /* Must update the mgs logs */ mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; break; - case 'f': { - char *nids = convert_hostnames(optarg); + case 'f': + case 's': { + char *nids; + + if ((opt == 'f' && servicenode_set) + || (opt == 's' && failnode_set)) { + fprintf(stderr, "%s: %s cannot use with --%s\n", + progname, long_opt[longidx].name, + opt == 'f' ? "servicenode" : "failnode"); + return 1; + } + + nids = convert_hostnames(optarg); if (!nids) return 1; rc = add_param(mop->mo_ldd.ldd_params, PARAM_FAILNODE, @@ -1282,11 +395,17 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, return rc; /* Must update the mgs logs */ mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; - failover = 1; + if (opt == 'f') { + failnode_set = 1; + } else { + mop->mo_ldd.ldd_flags |= LDD_F_NO_PRIMNODE; + servicenode_set = 1; + } + mop->mo_flags |= MO_FAILOVER; break; } case 'G': - mop->mo_ldd.ldd_flags |= SVTYPE_MGS; + mop->mo_ldd.ldd_flags |= LDD_F_SV_TYPE_MGS; break; case 'h': usage(stdout); @@ -1348,19 +467,19 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, break; } case 'M': - mop->mo_ldd.ldd_flags |= SVTYPE_MDT; + mop->mo_ldd.ldd_flags |= LDD_F_SV_TYPE_MDT; break; case 'n': print_only++; break; case 'N': - mop->mo_ldd.ldd_flags &= ~SVTYPE_MGS; + mop->mo_ldd.ldd_flags &= ~LDD_F_SV_TYPE_MGS; break; case 'o': *mountopts = optarg; break; case 'O': - mop->mo_ldd.ldd_flags |= SVTYPE_OST; + mop->mo_ldd.ldd_flags |= LDD_F_SV_TYPE_OST; break; case 'p': rc = add_param(mop->mo_ldd.ldd_params, NULL, optarg); @@ -1404,6 +523,9 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, case 'U': upgrade_to_18 = 1; break; + case 'Q': + mop->mo_flags |= MO_QUOTA; + break; default: if (opt != '?') { fatal(); @@ -1413,90 +535,24 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, } }//while - /* Last arg is device */ - if (optind != argc - 1) { - fatal(); - fprintf(stderr, "Bad argument: %s\n", argv[optind]); - return EINVAL; - } - - /* single argument: */ - if (argc == 2) - ++print_only; + if (optind == argc) { + /* The user didn't specify device name */ + fatal(); + fprintf(stderr, "Not enough arguments - device name or " + "pool/dataset name not specified.\n"); + return EINVAL; + } else { + /* The device or pool/filesystem name */ + strscpy(mop->mo_device, argv[optind], sizeof(mop->mo_device)); + + /* Followed by optional vdevs */ + if (optind < argc - 1) + mop->mo_pool_vdevs = (char **) &argv[optind + 1]; + } return 0; } -/* Search for opt in mntlist, returning true if found. - */ -static int in_mntlist(char *opt, char *mntlist) -{ - char *ml, *mlp, *item, *ctx = NULL; - - if (!(ml = strdup(mntlist))) { - fprintf(stderr, "%s: out of memory\n", progname); - exit(1); - } - mlp = ml; - while ((item = strtok_r(mlp, ",", &ctx))) { - if (!strcmp(opt, item)) - break; - mlp = NULL; - } - free(ml); - return (item != NULL); -} - -/* Issue a message on stderr for every item in wanted_mountopts that is not - * present in mountopts. The justwarn boolean toggles between error and - * warning message. Return an error count. - */ -static int check_mountfsoptions(char *mountopts, char *wanted_mountopts, - int justwarn) -{ - char *ml, *mlp, *item, *ctx = NULL; - int errors = 0; - - if (!(ml = strdup(wanted_mountopts))) { - fprintf(stderr, "%s: out of memory\n", progname); - exit(1); - } - mlp = ml; - while ((item = strtok_r(mlp, ",", &ctx))) { - if (!in_mntlist(item, mountopts)) { - fprintf(stderr, "%s: %s mount option `%s' is missing\n", - progname, justwarn ? "Warning: default" - : "Error: mandatory", item); - errors++; - } - mlp = NULL; - } - free(ml); - return errors; -} - -/* Trim embedded white space, leading and trailing commas from string s. - */ -static void trim_mountfsoptions(char *s) -{ - char *p; - - for (p = s; *p; ) { - if (isspace(*p)) { - memmove(p, p + 1, strlen(p + 1) + 1); - continue; - } - p++; - } - - while (s[0] == ',') - memmove(&s[0], &s[1], strlen(&s[1]) + 1); - - p = s + strlen(s) - 1; - while (p >= s && *p == ',') - *p-- = '\0'; -} - int main(int argc, char *const argv[]) { struct mkfs_opts mop; @@ -1504,6 +560,7 @@ int main(int argc, char *const argv[]) char *mountopts = NULL; char always_mountopts[512] = ""; char default_mountopts[512] = ""; + unsigned mount_type; int ret = 0; if ((progname = strrchr(argv[0], '/')) != NULL) @@ -1522,19 +579,12 @@ int main(int argc, char *const argv[]) /* device is last arg */ strscpy(mop.mo_device, argv[argc - 1], sizeof(mop.mo_device)); - /* Are we using a loop device? */ - ret = is_block(mop.mo_device); - if (ret < 0) - goto out; - if (ret == 0) - mop.mo_flags |= MO_IS_LOOP; - #ifdef TUNEFS /* For tunefs, we must read in the old values before parsing any new ones. */ /* Check whether the disk has already been formatted by mkfs.lustre */ - ret = is_lustre_target(&mop); + ret = osd_is_lustre(mop.mo_device, &mount_type); if (ret == 0) { fatal(); fprintf(stderr, "Device %s has not been formatted with " @@ -1543,13 +593,15 @@ int main(int argc, char *const argv[]) goto out; } - ret = read_local_files(&mop); + ret = osd_read_ldd(mop.mo_device, &mop.mo_ldd); if (ret) { fatal(); fprintf(stderr, "Failed to read previous Lustre data from %s " "(%d)\n", mop.mo_device, ret); goto out; } + mop.mo_ldd.ldd_flags &= ~(LDD_F_WRITECONF | LDD_F_VIRGIN); + if (strstr(mop.mo_ldd.ldd_params, PARAM_MGSNODE)) mop.mo_mgs_failnodes++; @@ -1557,6 +609,10 @@ int main(int argc, char *const argv[]) print_ldd("Read previous values", &(mop.mo_ldd)); #endif + ret = osd_init(); + if (ret) + return ret; + ret = parse_opts(argc, argv, &mop, &mountopts); if (ret) goto out; @@ -1577,6 +633,10 @@ int main(int argc, char *const argv[]) goto out; } + /* Stand alone MGS doesn't need a index */ + if (!IS_MDT(ldd) && IS_MGS(ldd)) + mop.mo_ldd.ldd_flags &= ~LDD_F_NEED_INDEX; + if ((mop.mo_ldd.ldd_flags & (LDD_F_NEED_INDEX | LDD_F_UPGRADE14)) == (LDD_F_NEED_INDEX | LDD_F_UPGRADE14)) { fatal(); @@ -1585,6 +645,12 @@ int main(int argc, char *const argv[]) ret = EINVAL; goto out; } + + if (IS_OST(ldd) && (mop.mo_ldd.ldd_flags & LDD_F_NEED_INDEX)) + fprintf(stderr, "warning: %s: for Lustre 2.4 and later, the " + "target index must be specified with --index\n", + mop.mo_device); + #if 0 /* * Comment out these 2 checks temporarily, since for multi-MDSes @@ -1593,7 +659,7 @@ int main(int argc, char *const argv[]) if (IS_MDT(ldd) && !IS_MGS(ldd) && (mop.mo_mgs_failnodes == 0)) { verrprint("No management node specified, adding MGS to this " "MDT\n"); - ldd->ldd_flags |= SVTYPE_MGS; + ldd->ldd_flags |= LDD_F_SV_TYPE_MGS; } if (!IS_MGS(ldd) && (mop.mo_mgs_failnodes == 0)) { fatal(); @@ -1605,46 +671,22 @@ int main(int argc, char *const argv[]) goto out; } #endif + if ((IS_MDT(ldd) || IS_OST(ldd)) && mop.mo_ldd.ldd_fsname[0] == '\0') { + fatal(); + fprintf(stderr, "Must specify --fsname for MDT/OST device\n"); + ret = EINVAL; + goto out; + } /* These are the permanent mount options (always included) */ - switch (ldd->ldd_mount_type) { - case LDD_MT_EXT3: - case LDD_MT_LDISKFS: - case LDD_MT_LDISKFS2: { - strscat(default_mountopts, ",errors=remount-ro", - sizeof(default_mountopts)); - if (IS_MDT(ldd) || IS_MGS(ldd)) - strscat(always_mountopts, ",iopen_nopriv,user_xattr", - sizeof(always_mountopts)); - if ((get_os_version() == 24) && IS_OST(ldd)) - strscat(always_mountopts, ",asyncdel", - sizeof(always_mountopts)); - /* NB: Files created while extents are enabled cannot be read - if mounted with a kernel that doesn't include the Lustre ldiskfs - patches! */ - if (IS_OST(ldd) && - (ldd->ldd_mount_type == LDD_MT_LDISKFS || - ldd->ldd_mount_type == LDD_MT_LDISKFS2)) { - strscat(default_mountopts, ",extents,mballoc", - sizeof(default_mountopts)); - } - break; - } - case LDD_MT_SMFS: { - mop.mo_flags |= MO_IS_LOOP; - sprintf(always_mountopts, ",type=ext3,dev=%s", - mop.mo_device); - break; - } - default: { - fatal(); - fprintf(stderr, "unknown fs type %d '%s'\n", - ldd->ldd_mount_type, - MT_STR(ldd)); - ret = EINVAL; - goto out; - } - } + ret = osd_prepare_lustre(&mop, + default_mountopts, sizeof(default_mountopts), + always_mountopts, sizeof(always_mountopts)); + if (ret) { + fatal(); + fprintf(stderr, "unable to prepare backend (%d)\n", ret); + goto out; + } if (mountopts) { trim_mountfsoptions(mountopts); @@ -1677,8 +719,8 @@ int main(int argc, char *const argv[]) goto out; } - if (check_mtab_entry(mop.mo_device)) - return(EEXIST); + if (check_mtab_entry(mop.mo_device, mop.mo_device, NULL, NULL)) + return(EEXIST); /* Create the loopback file */ if (mop.mo_flags & MO_IS_LOOP) { @@ -1687,8 +729,11 @@ int main(int argc, char *const argv[]) ret = errno; #ifndef TUNEFS /* mkfs.lustre */ /* Reformat the loopback file */ - if (ret || (mop.mo_flags & MO_FORCEFORMAT)) + if (ret || (mop.mo_flags & MO_FORCEFORMAT)) { ret = loop_format(&mop); + if (ret) + goto out; + } #endif if (ret == 0) ret = loop_setup(&mop); @@ -1703,7 +748,7 @@ int main(int argc, char *const argv[]) #ifndef TUNEFS /* mkfs.lustre */ /* Check whether the disk has already been formatted by mkfs.lustre */ if (!(mop.mo_flags & MO_FORCEFORMAT)) { - ret = is_lustre_target(&mop); + ret = osd_is_lustre(mop.mo_device, &mount_type); if (ret) { fatal(); fprintf(stderr, "Device %s was previously formatted " @@ -1715,16 +760,31 @@ int main(int argc, char *const argv[]) } /* Format the backing filesystem */ - ret = make_lustre_backfs(&mop); + ret = osd_make_lustre(&mop); if (ret != 0) { fatal(); fprintf(stderr, "mkfs failed %d\n", ret); goto out; } +#else + /* update svname with '=' to refresh config */ + if (mop.mo_ldd.ldd_flags & LDD_F_WRITECONF) { + struct mount_opts opts; + opts.mo_ldd = mop.mo_ldd; + opts.mo_source = mop.mo_device; + (void) osd_label_lustre(&opts); + } + + /* Enable quota accounting */ + if (mop.mo_flags & MO_QUOTA) { + ret = osd_enable_quota(&mop); + goto out; + } + #endif /* Write our config files */ - ret = write_local_files(&mop); + ret = osd_write_ldd(&mop); if (ret != 0) { fatal(); fprintf(stderr, "failed to write local files\n"); @@ -1733,6 +793,7 @@ int main(int argc, char *const argv[]) out: loop_cleanup(&mop); + osd_fini(); /* Fix any crazy return values from system() */ if (ret && ((ret & 255) == 0))