X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Futils%2Fmkfs_lustre.c;h=87ba9b65440ef7c56f8fbd6d67166b697bc80985;hp=130e8400a146faac157afa0b47ad909c40e73f03;hb=848f9e20320cb7c01eaf7f1b5c27f5efd54e4818;hpb=5b66e3d696b0e51687e129bfd5beaee35c7c8393 diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index 130e840..87ba9b6 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -1,28 +1,53 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* + * GPL HEADER START * - * Copyright (C) 2006 Cluster File Systems, Inc. - * Author: Nathan Rutman + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * This file is part of Lustre, http://www.lustre.org. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. * + * GPL HEADER END */ - /* This source file is compiled into both mkfs.lustre and tunefs.lustre */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/utils/mkfs_lustre.c + * + * Author: Nathan Rutman +*/ + +/* This source file is compiled into both mkfs.lustre and tunefs.lustre */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif /* HAVE_CONFIG_H */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif +#include "mount_utils.h" #include #include #include @@ -33,14 +58,13 @@ #include #include #include +#include #include #include #include +#include -#ifdef __linux__ -# include /* for BLKGETSIZE64 */ -#endif #include #include #include @@ -50,573 +74,97 @@ #define PATH_MAX 4096 #endif -#define MAX_LOOP_DEVICES 16 -#define L_BLOCK_SIZE 4096 -#define INDEX_UNASSIGNED 0xFFFF -#define MO_IS_LOOP 0x01 -#define MO_FORCEFORMAT 0x02 - -/* used to describe the options to format the lustre disk, not persistent */ -struct mkfs_opts { - struct lustre_disk_data mo_ldd; /* to be written in MOUNT_DATA_FILE */ - char mo_device[128]; /* disk device name */ - char mo_mkfsopts[128]; /* options to the backing-store mkfs */ - char mo_loopdev[128]; /* in case a loop dev is needed */ - __u64 mo_device_sz; /* in KB */ - int mo_stripe_count; - int mo_flags; - int mo_mgs_failnodes; -}; - -static char *progname; -static int verbose = 1; +char *progname; +int verbose = 1; static int print_only = 0; +static int upgrade_to_18 = 0; - -void usage(FILE *out) -{ - fprintf(out, "%s v"LUSTRE_VERSION_STRING"\n", progname); - fprintf(out, "usage: %s [options] \n", progname); - fprintf(out, - "\t:block device or file (e.g /dev/sda or /tmp/ost1)\n" - "\ttarget types:\n" - "\t\t--ost: object storage, mutually exclusive with mdt,mgs\n" - "\t\t--mdt: metadata storage, mutually exclusive with ost\n" - "\t\t--mgs: configuration management service - one per site\n" - "\toptions (in order of popularity):\n" - "\t\t--mgsnode=[,<...>] : NID(s) of a remote mgs node\n" - "\t\t\trequired for all targets other than the mgs node\n" - "\t\t--fsname= : default is 'lustre'\n" - "\t\t--failnode=[,<...>] : NID(s) of a failover partner\n" - "\t\t--param = : set a permanent parameter\n" - "\t\t\te.g. --param sys.timeout=40\n" - "\t\t\t --param lov.stripesize=2M\n" - "\t\t--index=#N : target index (i.e. ost index within the lov)\n" - /* FIXME implement 1.6.x - "\t\t--configdev=: store configuration info\n" - "\t\t\tfor this device on an alternate device\n" - */ - "\t\t--comment=: arbitrary user string (%d bytes)\n" - "\t\t--mountfsoptions= : permanent mount options\n" -#ifndef TUNEFS - "\t\t--backfstype= : backing fs type (ext3, ldiskfs)\n" - "\t\t--device-size=#N(KB) : device size for loop devices\n" - "\t\t--mkfsoptions= : format options\n" - "\t\t--reformat: overwrite an existing disk\n" - "\t\t--stripe-count-hint=#N : used for optimizing MDT inode size\n" +#ifdef HAVE_LDISKFS_OSD +#define FSLIST_LDISKFS "ldiskfs" +#define HAVE_FSLIST #else - "\t\t--erase-params : erase all old parameter settings\n" - "\t\t--nomgs: turn off MGS service on this MDT\n" - "\t\t--writeconf: erase all config logs for this fs.\n" -#endif - "\t\t--dryrun: just report what we would do; " - "don't write to disk\n" - "\t\t--verbose : e.g. show mkfs progress\n" - "\t\t--quiet\n", - (int)sizeof(((struct lustre_disk_data *)0)->ldd_userdata)); - return; -} - -#define vprint if (verbose > 0) printf -#define verrprint if (verbose >= 0) printf - -static void fatal(void) -{ - verbose = 0; - fprintf(stderr, "\n%s FATAL: ", progname); -} - -/*================ utility functions =====================*/ - -char *strscat(char *dst, char *src, int buflen) { - dst[buflen - 1] = 0; - if (strlen(dst) + strlen(src) >= buflen) { - fprintf(stderr, "string buffer overflow (max %d): '%s' + '%s'" - "\n", buflen, dst, src); - exit(EOVERFLOW); - } - return strcat(dst, src); - -} - -char *strscpy(char *dst, char *src, int buflen) { - dst[0] = 0; - return strscat(dst, src, buflen); -} - -inline unsigned int -dev_major (unsigned long long int __dev) -{ - return ((__dev >> 8) & 0xfff) | ((unsigned int) (__dev >> 32) & ~0xfff); -} - -inline unsigned int -dev_minor (unsigned long long int __dev) -{ - return (__dev & 0xff) | ((unsigned int) (__dev >> 12) & ~0xff); -} - -int get_os_version() -{ - static int version = 0; - - if (!version) { - int fd; - char release[4] = ""; - - fd = open("/proc/sys/kernel/osrelease", O_RDONLY); - if (fd < 0) - fprintf(stderr, "%s: Warning: Can't resolve kernel " - "version, assuming 2.6\n", progname); - else { - read(fd, release, 4); - close(fd); - } - if (strncmp(release, "2.4.", 4) == 0) - version = 24; - else - version = 26; - } - return version; -} - -int run_command(char *cmd, int cmdsz) -{ - char log[] = "/tmp/mkfs_logXXXXXX"; - int fd = -1, rc; - - if ((cmdsz - strlen(cmd)) < 6) { - fatal(); - fprintf(stderr, "Command buffer overflow: %.*s...\n", - cmdsz, cmd); - return ENOMEM; - } - - if (verbose > 1) { - printf("cmd: %s\n", cmd); - } else { - if ((fd = mkstemp(log)) >= 0) { - close(fd); - strcat(cmd, " >"); - strcat(cmd, log); - } - } - strcat(cmd, " 2>&1"); - - /* Can't use popen because we need the rv of the command */ - rc = system(cmd); - if (rc && (fd >= 0)) { - char buf[128]; - FILE *fp; - fp = fopen(log, "r"); - if (fp) { - while (fgets(buf, sizeof(buf), fp) != NULL) { - printf(" %s", buf); - } - fclose(fp); - } - } - if (fd >= 0) - remove(log); - return rc; -} - -static int check_mtab_entry(char *spec) -{ - FILE *fp; - struct mntent *mnt; - - fp = setmntent(MOUNTED, "r"); - if (fp == NULL) - return(0); - - while ((mnt = getmntent(fp)) != NULL) { - if (strcmp(mnt->mnt_fsname, spec) == 0) { - endmntent(fp); - fprintf(stderr, "%s: according to %s %s is " - "already mounted on %s\n", - progname, MOUNTED, spec, mnt->mnt_dir); - return(EEXIST); - } - } - endmntent(fp); - - return(0); -} - -/*============ disk dev functions ===================*/ - -/* Setup a file in the first unused loop_device */ -int loop_setup(struct mkfs_opts *mop) -{ - char loop_base[20]; - char l_device[64]; - int i, ret = 0; - - /* Figure out the loop device names */ - if (!access("/dev/loop0", F_OK | R_OK)) { - strcpy(loop_base, "/dev/loop\0"); - } else if (!access("/dev/loop/0", F_OK | R_OK)) { - strcpy(loop_base, "/dev/loop/\0"); - } else { - fprintf(stderr, "%s: can't access loop devices\n", progname); - return EACCES; - } - - /* Find unused loop device */ - for (i = 0; i < MAX_LOOP_DEVICES; i++) { - char cmd[PATH_MAX]; - int cmdsz = sizeof(cmd); - sprintf(l_device, "%s%d", loop_base, i); - if (access(l_device, F_OK | R_OK)) - break; - snprintf(cmd, cmdsz, "losetup %s > /dev/null 2>&1", l_device); - ret = system(cmd); - - /* losetup gets 1 (ret=256) for non-set-up device */ - if (ret) { - /* Set up a loopback device to our file */ - snprintf(cmd, cmdsz, "losetup %s %s", l_device, - mop->mo_device); - ret = run_command(cmd, cmdsz); - if (ret) { - fprintf(stderr, "%s: error %d on losetup: %s\n", - progname, ret, strerror(ret)); - return ret; - } - strscpy(mop->mo_loopdev, l_device, - sizeof(mop->mo_loopdev)); - return ret; - } - } - - fprintf(stderr, "%s: out of loop devices!\n", progname); - return EMFILE; -} - -int loop_cleanup(struct mkfs_opts *mop) -{ - char cmd[150]; - int ret = 1; - if ((mop->mo_flags & MO_IS_LOOP) && *mop->mo_loopdev) { - sprintf(cmd, "losetup -d %s", mop->mo_loopdev); - ret = run_command(cmd, sizeof(cmd)); - } - return ret; -} + #define FSLIST_LDISKFS "" +#endif /* HAVE_LDISKFS_OSD */ +#ifdef HAVE_ZFS_OSD + #ifdef HAVE_FSLIST + #define FSLIST_ZFS "|zfs" + #else + #define FSLIST_ZFS "zfs" + #define HAVE_FSLIST + #endif +#else + #define FSLIST_ZFS "" +#endif /* HAVE_ZFS_OSD */ -/* Determine if a device is a block device (as opposed to a file) */ -int is_block(char* devname) -{ - struct stat st; - int ret = 0; +#ifndef HAVE_FSLIST + #error "no backing OSD types (ldiskfs or ZFS) are configured" +#endif - ret = access(devname, F_OK); - if (ret != 0) - return 0; - ret = stat(devname, &st); - if (ret != 0) { - fprintf(stderr, "%s: cannot stat %s\n", progname, devname); - return -1; - } - return S_ISBLK(st.st_mode); -} +#define FSLIST FSLIST_LDISKFS FSLIST_ZFS -__u64 get_device_size(char* device) +void usage(FILE *out) { - int ret, fd; - __u64 size = 0; - - fd = open(device, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "%s: cannot open %s: %s\n", - progname, device, strerror(errno)); - return 0; - } - -#ifdef BLKGETSIZE64 - /* size in bytes. bz5831 */ - ret = ioctl(fd, BLKGETSIZE64, (void*)&size); + fprintf(out, "%s v"LUSTRE_VERSION_STRING"\n", progname); + fprintf(out, "usage: %s [--backfstype="FSLIST"] " + "--fsname=\n" + "\t--index= [options] \n", progname); +#ifdef HAVE_ZFS_OSD + fprintf(out, "usage: %s --backfstype=zfs " + "--fsname= [options]\n" + "\t/\n" + "\t[[] [ ...] [vdev type>] ...]\n", + progname); +#endif + fprintf(out, + "\t:block device or file (e.g /dev/sda or /tmp/ost1)\n" +#ifdef HAVE_ZFS_OSD + "\t: name of ZFS pool where target is created " + "(e.g. tank)\n" + "\t: name of new dataset, must be unique within " + "pool (e.g. ost1)\n" + "\t: type of vdev (mirror, raidz, raidz2, spare, " + "cache, log)\n" +#endif + "\n" + "\ttarget types:\n" + "\t\t--mgs: configuration management service\n" + "\t\t--mdt: metadata storage, mutually exclusive with ost\n" + "\t\t--ost: object storage, mutually exclusive with mdt, mgs\n" + "\toptions (in order of popularity):\n" + "\t\t--index=#N: numerical target index (0..N)\n" + "\t\t\trequired for all targets other than the MGS\n" + "\t\t--fsname=<8_char_filesystem_name>: fs targets belong to\n" + "\t\t\trequired for all targets other than MGS\n" + "\t\t--mgsnode=[,<...>]: NID(s) of remote MGS\n" + "\t\t\trequired for all targets other than MGS\n" + "\t\t--mountfsoptions=: permanent mount options\n" + "\t\t--failnode=[,<...>]: NID(s) of backup failover node\n" + "\t\t\tmutually exclusive with --servicenode\n" + "\t\t--servicenode=[,<...>]: NID(s) of service partners\n" + "\t\t\ttreat nodes as equal service node, mutually exclusive " + "with --failnode\n" + "\t\t--param =: set a permanent parameter\n" + "\t\t\te.g. --param sys.timeout=40\n" + "\t\t\t --param lov.stripesize=2M\n" + "\t\t--network=[,<...>]: restrict OST/MDT to network(s)\n" +#ifndef TUNEFS + "\t\t--backfstype=: backing fs type (ext3, ldiskfs)\n" + "\t\t--device-size=#N(KB): device size for loop devices\n" + "\t\t--mkfsoptions=: format options\n" + "\t\t--reformat: overwrite an existing disk\n" + "\t\t--stripe-count-hint=#N: for optimizing MDT inode size\n" #else - { - __u32 lsize = 0; - /* size in blocks */ - ret = ioctl(fd, BLKGETSIZE, (void*)&lsize); - size = (__u64)lsize * 512; - } + "\t\t--erase-params: erase all old parameter settings\n" + "\t\t--nomgs: turn off MGS service on this MDT\n" + "\t\t--writeconf: erase all config logs for this fs.\n" + "\t\t--quota: enable space accounting on old 2.x device.\n" #endif - close(fd); - if (ret < 0) { - fprintf(stderr, "%s: size ioctl failed: %s\n", - progname, strerror(errno)); - return 0; - } - - vprint("device size = "LPU64"MB\n", size >> 20); - /* return value in KB */ - return size >> 10; -} - -int loop_format(struct mkfs_opts *mop) -{ - int ret = 0; - - if (mop->mo_device_sz == 0) { - fatal(); - fprintf(stderr, "loop device requires a --device-size= " - "param\n"); - return EINVAL; - } - - ret = creat(mop->mo_device, S_IRUSR|S_IWUSR); - if (ret < 0) { - ret = errno; - fprintf(stderr, "%s: Unable to create backing store: %d\n", - progname, ret); - } else { - close(ret); - } - - ret = truncate(mop->mo_device, mop->mo_device_sz * 1024); - if (ret != 0) { - ret = errno; - fprintf(stderr, "%s: Unable to truncate backing store: %d\n", - progname, ret); - } - - return ret; -} - -/* Check whether the file exists in the device */ -static int file_in_dev(char *file_name, char *dev_name) -{ - FILE *fp; - char debugfs_cmd[256]; - unsigned int inode_num; - int i; - - /* Construct debugfs command line. */ - snprintf(debugfs_cmd, sizeof(debugfs_cmd), - "debugfs -c -R 'stat %s' %s 2>&1 | egrep '(Inode|unsupported)'", - file_name, dev_name); - - fp = popen(debugfs_cmd, "r"); - if (!fp) { - fprintf(stderr, "%s: %s\n", progname, strerror(errno)); - return 0; - } - - if (fscanf(fp, "Inode: %u", &inode_num) == 1) { /* exist */ - pclose(fp); - return 1; - } - i = fread(debugfs_cmd, 1, sizeof(debugfs_cmd), fp); - if (i) { - debugfs_cmd[i] = 0; - fprintf(stderr, "%s", debugfs_cmd); - if (strstr(debugfs_cmd, "unsupported feature")) { - fprintf(stderr, "In all likelihood, the " - "'unsupported feature' is 'extents', which " - "older debugfs does not understand.\n" - "Use e2fsprogs-1.38-cfs1 or later, available " - "from ftp://ftp.lustre.org/pub/lustre/other/" - "e2fsprogs/\n"); - } - return -1; - } - pclose(fp); - return 0; -} - -/* Check whether the device has already been used with lustre */ -static int is_lustre_target(struct mkfs_opts *mop) -{ - int rc; - - vprint("checking for existing Lustre data: "); - - if ((rc = file_in_dev(MOUNT_DATA_FILE, mop->mo_device))) { - vprint("found %s\n", - (rc == 1) ? MOUNT_DATA_FILE : "extents"); - /* in the -1 case, 'extents' means this really IS a lustre - target */ - return rc; - } - - if ((rc = file_in_dev(LAST_RCVD, mop->mo_device))) { - vprint("found %s\n", LAST_RCVD); - return rc; - } - - vprint("not found\n"); - return 0; /* The device is not a lustre target. */ -} - -/* Build fs according to type */ -int make_lustre_backfs(struct mkfs_opts *mop) -{ - char mkfs_cmd[PATH_MAX]; - char buf[64]; - char *dev; - int ret = 0; - int block_count = 0; - - if (mop->mo_device_sz != 0) { - if (mop->mo_device_sz < 8096){ - fprintf(stderr, "%s: size of filesystem must be larger " - "than 8MB, but is set to %lldKB\n", - progname, (long long)mop->mo_device_sz); - return EINVAL; - } - block_count = mop->mo_device_sz / (L_BLOCK_SIZE >> 10); - } - - if ((mop->mo_ldd.ldd_mount_type == LDD_MT_EXT3) || - (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS) || - (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS2)) { - __u64 device_sz = mop->mo_device_sz; - - /* we really need the size */ - if (device_sz == 0) { - device_sz = get_device_size(mop->mo_device); - if (device_sz == 0) - return ENODEV; - } - - /* Journal size in MB */ - if (strstr(mop->mo_mkfsopts, "-J") == NULL) { - /* Choose our own default journal size */ - long journal_sz = 0, max_sz; - if (device_sz > 1024 * 1024) /* 1GB */ - journal_sz = (device_sz / 102400) * 4; - /* cap journal size at 1GB */ - if (journal_sz > 1024L) - journal_sz = 1024L; - /* man mkfs.ext3 */ - max_sz = (256000 * L_BLOCK_SIZE) >> 20; /* 1GB */ - if (journal_sz > max_sz) - journal_sz = max_sz; - if (journal_sz) { - sprintf(buf, " -J size=%ld", journal_sz); - strscat(mop->mo_mkfsopts, buf, - sizeof(mop->mo_mkfsopts)); - } - } - - /* Bytes_per_inode: disk size / num inodes */ - if (strstr(mop->mo_mkfsopts, "-i") == NULL) { - long bytes_per_inode = 0; - - if (IS_MDT(&mop->mo_ldd)) - bytes_per_inode = 4096; - - /* Allocate fewer inodes on large OST devices. Most - filesystems can be much more aggressive than even - this. */ - if ((IS_OST(&mop->mo_ldd) && (device_sz > 100000000))) - bytes_per_inode = 16384; /* > 100 Gb device */ - - - if (bytes_per_inode > 0) { - sprintf(buf, " -i %ld", bytes_per_inode); - strscat(mop->mo_mkfsopts, buf, - sizeof(mop->mo_mkfsopts)); - } - } - - /* Inode size (for extended attributes). The LOV EA size is - * 32 (EA hdr) + 32 (lov_mds_md) + stripes * 24 (lov_ost_data), - * and we want some margin above that for ACLs, other EAs... */ - if (strstr(mop->mo_mkfsopts, "-I") == NULL) { - long inode_size = 0; - if (IS_MDT(&mop->mo_ldd)) { - if (mop->mo_stripe_count > 72) - inode_size = 512; /* bz 7241 */ - /* cray stripes across all osts (>60) */ - else if (mop->mo_stripe_count > 32) - inode_size = 2048; - else if (mop->mo_stripe_count > 10) - inode_size = 1024; - else - inode_size = 512; - } else if (IS_OST(&mop->mo_ldd)) { - /* now as we store fids in EA on OST we need - to make inode bigger */ - inode_size = 256; - } - - if (inode_size > 0) { - sprintf(buf, " -I %ld", inode_size); - strscat(mop->mo_mkfsopts, buf, - sizeof(mop->mo_mkfsopts)); - } - } - - if (verbose < 2) { - strscat(mop->mo_mkfsopts, " -q", - sizeof(mop->mo_mkfsopts)); - } - - if (strstr(mop->mo_mkfsopts, "-O") == NULL) { - /* Enable hashed b-tree directory lookup in large dirs - bz6224 */ - strscat(mop->mo_mkfsopts, " -O dir_index", - sizeof(mop->mo_mkfsopts)); - /* ldiskfs2: do not initialize all groups. */ - if (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS2) - strscat(mop->mo_mkfsopts, ",uninit_groups", - sizeof(mop->mo_mkfsopts)); - } - - /* Allow reformat of full devices (as opposed to - partitions.) We already checked for mounted dev. */ - strscat(mop->mo_mkfsopts, " -F", sizeof(mop->mo_mkfsopts)); - - snprintf(mkfs_cmd, sizeof(mkfs_cmd), - "mkfs.ext2 -j -b %d -L %s ", L_BLOCK_SIZE, - mop->mo_ldd.ldd_svname); - } else if (mop->mo_ldd.ldd_mount_type == LDD_MT_REISERFS) { - long journal_sz = 0; /* FIXME default journal size */ - if (journal_sz > 0) { - sprintf(buf, " --journal_size %ld", journal_sz); - strscat(mop->mo_mkfsopts, buf, - sizeof(mop->mo_mkfsopts)); - } - snprintf(mkfs_cmd, sizeof(mkfs_cmd), "mkreiserfs -ff "); - } else { - fprintf(stderr,"%s: unsupported fs type: %d (%s)\n", - progname, mop->mo_ldd.ldd_mount_type, - MT_STR(&mop->mo_ldd)); - return EINVAL; - } - - /* For loop device format the dev, not the filename */ - dev = mop->mo_device; - if (mop->mo_flags & MO_IS_LOOP) - dev = mop->mo_loopdev; - - vprint("formatting backing filesystem %s on %s\n", - MT_STR(&mop->mo_ldd), dev); - vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname); - vprint("\t4k blocks %d\n", block_count); - vprint("\toptions %s\n", mop->mo_mkfsopts); - - /* mkfs_cmd's trailing space is important! */ - strscat(mkfs_cmd, mop->mo_mkfsopts, sizeof(mkfs_cmd)); - strscat(mkfs_cmd, " ", sizeof(mkfs_cmd)); - strscat(mkfs_cmd, dev, sizeof(mkfs_cmd)); - if (block_count != 0) { - sprintf(buf, " %d", block_count); - strscat(mkfs_cmd, buf, sizeof(mkfs_cmd)); - } - - vprint("mkfs_cmd = %s\n", mkfs_cmd); - ret = run_command(mkfs_cmd, sizeof(mkfs_cmd)); - if (ret) { - fatal(); - fprintf(stderr, "Unable to build fs %s (%d)\n", dev, ret); - } - return ret; + "\t\t--comment=: arbitrary string (%d bytes)\n" + "\t\t--dryrun: report what we would do; don't write to disk\n" + "\t\t--verbose: e.g. show mkfs progress\n" + "\t\t--quiet\n", + (int)sizeof(((struct lustre_disk_data *)0)->ldd_userdata)); + return; } /* ==================== Lustre config functions =============*/ @@ -634,7 +182,7 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) printf("Lustre FS: %s\n", ldd->ldd_fsname); printf("Mount type: %s\n", MT_STR(ldd)); printf("Flags: %#x\n", ldd->ldd_flags); - printf(" (%s%s%s%s%s%s%s%s)\n", + printf(" (%s%s%s%s%s%s%s%s%s)\n", IS_MDT(ldd) ? "MDT ":"", IS_OST(ldd) ? "OST ":"", IS_MGS(ldd) ? "MGS ":"", @@ -642,6 +190,7 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) ldd->ldd_flags & LDD_F_VIRGIN ? "first_time ":"", ldd->ldd_flags & LDD_F_UPDATE ? "update ":"", ldd->ldd_flags & LDD_F_WRITECONF ? "writeconf ":"", + ldd->ldd_flags & LDD_F_NO_PRIMNODE? "no_primnode ":"", ldd->ldd_flags & LDD_F_UPGRADE14 ? "upgrade1.4 ":""); printf("Persistent mount opts: %s\n", ldd->ldd_mount_opts); printf("Parameters:%s\n", ldd->ldd_params); @@ -650,295 +199,20 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) printf("\n"); } -/* Write the server config files */ -int write_local_files(struct mkfs_opts *mop) -{ - char mntpt[] = "/tmp/mntXXXXXX"; - char filepnm[128]; - char *dev; - FILE *filep; - int ret = 0; - - /* Mount this device temporarily in order to write these files */ - if (!mkdtemp(mntpt)) { - fprintf(stderr, "%s: Can't create temp mount point %s: %s\n", - progname, mntpt, strerror(errno)); - return errno; - } - - dev = mop->mo_device; - if (mop->mo_flags & MO_IS_LOOP) - dev = mop->mo_loopdev; - - ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0, NULL); - if (ret) { - fprintf(stderr, "%s: Unable to mount %s: %s\n", - progname, dev, strerror(errno)); - ret = errno; - if (errno == ENODEV) { - fprintf(stderr, "Is the %s module available?\n", - MT_STR(&mop->mo_ldd)); - } - goto out_rmdir; - } - - /* Set up initial directories */ - sprintf(filepnm, "%s/%s", mntpt, MOUNT_CONFIGS_DIR); - ret = mkdir(filepnm, 0777); - if ((ret != 0) && (errno != EEXIST)) { - fprintf(stderr, "%s: Can't make configs dir %s (%s)\n", - progname, filepnm, strerror(errno)); - goto out_umnt; - } else if (errno == EEXIST) { - ret = 0; - } - - sprintf(filepnm, "%s/%s", mntpt, "ROOT"); - ret = mkdir(filepnm, 0777); - if ((ret != 0) && (errno != EEXIST)) { - fprintf(stderr, "%s: Can't make ROOT dir %s (%s)\n", - progname, filepnm, strerror(errno)); - goto out_umnt; - } else if (errno == EEXIST) { - ret = 0; - } - - /* Save the persistent mount data into a file. Lustre must pre-read - this file to get the real mount options. */ - vprint("Writing %s\n", MOUNT_DATA_FILE); - sprintf(filepnm, "%s/%s", mntpt, MOUNT_DATA_FILE); - filep = fopen(filepnm, "w"); - if (!filep) { - fprintf(stderr, "%s: Unable to create %s file: %s\n", - progname, filepnm, strerror(errno)); - goto out_umnt; - } - fwrite(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep); - fclose(filep); - - /* COMPAT_146 */ -#ifdef TUNEFS - /* Check for upgrade */ - if ((mop->mo_ldd.ldd_flags & (LDD_F_UPGRADE14 | LDD_F_SV_TYPE_MGS)) - == (LDD_F_UPGRADE14 | LDD_F_SV_TYPE_MGS)) { - char cmd[128]; - char *term; - int cmdsz = sizeof(cmd); - vprint("Copying old logs\n"); - - /* Copy the old client log to fsname-client */ - sprintf(filepnm, "%s/%s/%s-client", - mntpt, MOUNT_CONFIGS_DIR, mop->mo_ldd.ldd_fsname); - snprintf(cmd, cmdsz, "cp %s/%s/client %s", mntpt, MDT_LOGS_DIR, - filepnm); - ret = run_command(cmd, cmdsz); - if (ret) { - fprintf(stderr, "%s: Can't copy 1.4 config %s/client " - "(%d)\n", progname, MDT_LOGS_DIR, ret); - fprintf(stderr, "mount -t ldiskfs %s somewhere, " - "find the client log for fs %s and " - "copy it manually into %s/%s-client, " - "then umount.\n", - mop->mo_device, - mop->mo_ldd.ldd_fsname, MOUNT_CONFIGS_DIR, - mop->mo_ldd.ldd_fsname); - goto out_umnt; - } - - /* We need to use the old mdt log because otherwise mdt won't - have complete lov if old clients connect before all - servers upgrade. */ - /* Copy the old mdt log to fsname-MDT0000 (get old - name from mdt_UUID) */ - ret = 1; - strscpy(filepnm, (char *)mop->mo_ldd.ldd_uuid, sizeof(filepnm)); - term = strstr(filepnm, "_UUID"); - if (term) { - *term = '\0'; - snprintf(cmd, cmdsz, "cp %s/%s/%s %s/%s/%s", - mntpt, MDT_LOGS_DIR, filepnm, - mntpt, MOUNT_CONFIGS_DIR, - mop->mo_ldd.ldd_svname); - ret = run_command(cmd, cmdsz); - } - if (ret) { - fprintf(stderr, "%s: Can't copy 1.4 config %s/%s " - "(%d)\n", progname, MDT_LOGS_DIR, filepnm, ret); - fprintf(stderr, "mount -t ext3 %s somewhere, " - "find the MDT log for fs %s and " - "copy it manually into %s/%s, " - "then umount.\n", - mop->mo_device, - mop->mo_ldd.ldd_fsname, MOUNT_CONFIGS_DIR, - mop->mo_ldd.ldd_svname); - goto out_umnt; - } - } -#endif - /* end COMPAT_146 */ - - -out_umnt: - umount(mntpt); -out_rmdir: - rmdir(mntpt); - return ret; -} - -int read_local_files(struct mkfs_opts *mop) -{ - char tmpdir[] = "/tmp/dirXXXXXX"; - char cmd[PATH_MAX]; - char filepnm[128]; - char *dev; - FILE *filep; - int ret = 0; - int cmdsz = sizeof(cmd); - - /* Make a temporary directory to hold Lustre data files. */ - if (!mkdtemp(tmpdir)) { - fprintf(stderr, "%s: Can't create temporary directory %s: %s\n", - progname, tmpdir, strerror(errno)); - return errno; - } - - dev = mop->mo_device; - - /* Construct debugfs command line. */ - snprintf(cmd, cmdsz, "debugfs -c -R 'dump /%s %s/mountdata' %s", - MOUNT_DATA_FILE, tmpdir, dev); - - ret = run_command(cmd, cmdsz); - if (ret) - verrprint("%s: Unable to dump %s dir (%d)\n", - progname, MOUNT_CONFIGS_DIR, ret); - - sprintf(filepnm, "%s/mountdata", tmpdir); - filep = fopen(filepnm, "r"); - if (filep) { - vprint("Reading %s\n", MOUNT_DATA_FILE); - fread(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep); - } else { - /* COMPAT_146 */ - /* Try to read pre-1.6 config from last_rcvd */ - struct lr_server_data lsd; - verrprint("%s: Unable to read %s (%s).\n", - progname, filepnm, strerror(errno)); - - verrprint("Trying last_rcvd\n"); - sprintf(filepnm, "%s/%s", tmpdir, LAST_RCVD); - - /* Construct debugfs command line. */ - snprintf(cmd, cmdsz, "debugfs -c -R 'dump /%s %s' %s", - LAST_RCVD, filepnm, dev); - - ret = run_command(cmd, cmdsz); - if (ret) { - fprintf(stderr, "%s: Unable to dump %s file (%d)\n", - progname, LAST_RCVD, ret); - goto out_rmdir; - } - - filep = fopen(filepnm, "r"); - if (!filep) { - fprintf(stderr, "%s: Unable to open %s: %s\n", - progname, filepnm, strerror(errno)); - ret = errno; - verrprint("Contents of %s:\n", tmpdir); - verbose+=2; - snprintf(cmd, cmdsz, "ls -l %s/", tmpdir); - run_command(cmd, cmdsz); - verrprint("Contents of disk:\n"); - snprintf(cmd, cmdsz, "debugfs -c -R 'ls -l /' %s", dev); - run_command(cmd, cmdsz); - - goto out_rmdir; - } - vprint("Reading %s\n", LAST_RCVD); - ret = fread(&lsd, 1, sizeof(lsd), filep); - if (ret < sizeof(lsd)) { - fprintf(stderr, "%s: Short read (%d of %d)\n", - progname, ret, (int)sizeof(lsd)); - ret = ferror(filep); - if (ret) - goto out_close; - } - vprint("Feature compat=%x, incompat=%x\n", - lsd.lsd_feature_compat, lsd.lsd_feature_incompat); - - if ((lsd.lsd_feature_compat & OBD_COMPAT_OST) || - (lsd.lsd_feature_incompat & OBD_INCOMPAT_OST)) { - mop->mo_ldd.ldd_flags = LDD_F_SV_TYPE_OST; - mop->mo_ldd.ldd_svindex = lsd.lsd_ost_index; - } else if ((lsd.lsd_feature_compat & OBD_COMPAT_MDT) || - (lsd.lsd_feature_incompat & OBD_INCOMPAT_MDT)) { - /* We must co-locate so mgs can see old logs. - If user doesn't want this, they can copy the old - logs manually and re-tunefs. */ - mop->mo_ldd.ldd_flags = - LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_MGS; - mop->mo_ldd.ldd_svindex = lsd.lsd_mdt_index; - } else { - /* If neither is set, we're pre-1.4.6, make a guess. */ - /* Construct debugfs command line. */ - snprintf(cmd, cmdsz, "debugfs -c -R 'rdump /%s %s' %s", - MDT_LOGS_DIR, tmpdir, dev); - run_command(cmd, cmdsz); - - sprintf(filepnm, "%s/%s", tmpdir, MDT_LOGS_DIR); - if (lsd.lsd_ost_index > 0) { - mop->mo_ldd.ldd_flags = LDD_F_SV_TYPE_OST; - mop->mo_ldd.ldd_svindex = lsd.lsd_ost_index; - } else { - /* If there's a LOGS dir, it's an MDT */ - if ((ret = access(filepnm, F_OK)) == 0) { - mop->mo_ldd.ldd_flags = - LDD_F_SV_TYPE_MDT | - LDD_F_SV_TYPE_MGS; - /* Old MDT's are always index 0 - (pre CMD) */ - mop->mo_ldd.ldd_svindex = 0; - } else { - /* The index may not be correct */ - mop->mo_ldd.ldd_flags = - LDD_F_SV_TYPE_OST | LDD_F_NEED_INDEX; - verrprint("OST with unknown index\n"); - } - } - } - - ret = 0; - memcpy(mop->mo_ldd.ldd_uuid, lsd.lsd_uuid, - sizeof(mop->mo_ldd.ldd_uuid)); - mop->mo_ldd.ldd_flags |= LDD_F_UPGRADE14; - } - /* end COMPAT_146 */ -out_close: - fclose(filep); - -out_rmdir: - snprintf(cmd, cmdsz, "rm -rf %s", tmpdir); - run_command(cmd, cmdsz); - if (ret) - verrprint("Failed to read old data (%d)\n", ret); - return ret; -} - - void set_defaults(struct mkfs_opts *mop) { - mop->mo_ldd.ldd_magic = LDD_MAGIC; - mop->mo_ldd.ldd_config_ver = 1; - mop->mo_ldd.ldd_flags = LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_VIRGIN; - mop->mo_mgs_failnodes = 0; - strcpy(mop->mo_ldd.ldd_fsname, "lustre"); - if (get_os_version() == 24) - mop->mo_ldd.ldd_mount_type = LDD_MT_EXT3; - else - mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS; - - mop->mo_ldd.ldd_svindex = INDEX_UNASSIGNED; - mop->mo_stripe_count = 1; + mop->mo_ldd.ldd_magic = LDD_MAGIC; + mop->mo_ldd.ldd_config_ver = 1; + mop->mo_ldd.ldd_flags = LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_VIRGIN; +#ifdef HAVE_LDISKFS_OSD + mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS; +#else + mop->mo_ldd.ldd_mount_type = LDD_MT_ZFS; +#endif + mop->mo_ldd.ldd_svindex = INDEX_UNASSIGNED; + mop->mo_mgs_failnodes = 0; + mop->mo_stripe_count = 1; + mop->mo_pool_vdevs = NULL; } static inline void badopt(const char *opt, char *type) @@ -948,30 +222,12 @@ static inline void badopt(const char *opt, char *type) usage(stderr); } -static int add_param(char *buf, char *key, char *val) -{ - int end = sizeof(((struct lustre_disk_data *)0)->ldd_params); - int start = strlen(buf); - int keylen = 0; - - if (key) - keylen = strlen(key); - if (start + 1 + keylen + strlen(val) >= end) { - fprintf(stderr, "%s: params are too long-\n%s %s%s\n", - progname, buf, key ? key : "", val); - return 1; - } - - sprintf(buf + start, " %s%s", key ? key : "", val); - return 0; -} - /* from mount_lustre */ /* Get rid of symbolic hostnames for tcp, since kernel can't do lookups */ #define MAXNIDSTR 1024 static char *convert_hostnames(char *s1) { - char *converted, *s2 = 0, *c; + char *converted, *s2 = 0, *c, *end, sep; int left = MAXNIDSTR; lnet_nid_t nid; @@ -980,33 +236,37 @@ static char *convert_hostnames(char *s1) return NULL; } + end = s1 + strlen(s1); c = converted; - while ((left > 0) && ((s2 = strsep(&s1, ",: \0")))) { - nid = libcfs_str2nid(s2); + while ((left > 0) && (s1 < end)) { + s2 = strpbrk(s1, ",:"); + if (!s2) + s2 = end; + sep = *s2; + *s2 = '\0'; + nid = libcfs_str2nid(s1); + *s2 = sep; + if (nid == LNET_NID_ANY) { - if (*s2 == '/') - /* end of nids */ - break; fprintf(stderr, "%s: Can't parse NID '%s'\n", - progname, s2); + progname, s1); free(converted); return NULL; } - if (strncmp(libcfs_nid2str(nid), "127.0.0.1", strlen("127.0.0.1")) == 0) { fprintf(stderr, "%s: The NID '%s' resolves to the " "loopback address '%s'. Lustre requires a " "non-loopback address.\n", - progname, s2, libcfs_nid2str(nid)); + progname, s1, libcfs_nid2str(nid)); free(converted); return NULL; } - c += snprintf(c, left, "%s,", libcfs_nid2str(nid)); + c += snprintf(c, left, "%s%c", libcfs_nid2str(nid), sep); left = converted + MAXNIDSTR - c; + s1 = s2 + 1; } - *(c - 1) = '\0'; return converted; } @@ -1039,13 +299,18 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, {"print", 0, 0, 'n'}, {"quiet", 0, 0, 'q'}, {"reformat", 0, 0, 'r'}, + {"servicenode", 1, 0, 's'}, {"verbose", 0, 0, 'v'}, {"writeconf", 0, 0, 'w'}, + {"upgrade_to_18", 0, 0, 'U'}, + {"network", 1, 0, 't'}, + {"quota", 0, 0, 'Q'}, {0, 0, 0, 0} }; - char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqru:vw"; + char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqrs:t:Uu:vw"; int opt; int rc, longidx; + int failnode_set = 0, servicenode_set = 0; while ((opt = getopt_long(argc, argv, optstring, long_opt, &longidx)) != EOF) { @@ -1059,6 +324,11 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, } i++; } + if (i == LDD_MT_LAST) { + fprintf(stderr, "%s: invalid backend filesystem" + " type %s\n", progname, optarg); + return 1; + } break; } case 'c': @@ -1087,23 +357,35 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, /* Must update the mgs logs */ mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; break; - case 'f': { - char *nids = convert_hostnames(optarg); + case 'f': + case 's': { + char *nids; + + if ((opt == 'f' && servicenode_set) + || (opt == 's' && failnode_set)) { + fprintf(stderr, "%s: %s cannot use with --%s\n", + progname, long_opt[longidx].name, + opt == 'f' ? "servicenode" : "failnode"); + return 1; + } + + nids = convert_hostnames(optarg); if (!nids) return 1; rc = add_param(mop->mo_ldd.ldd_params, PARAM_FAILNODE, nids); - /* Combo needs to add MDT failnodes as MGS failnodes - as well */ - if (!rc && IS_MGS(&mop->mo_ldd)) { - rc = add_param(mop->mo_ldd.ldd_params, - PARAM_MGSNODE, nids); - } free(nids); if (rc) return rc; /* Must update the mgs logs */ mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; + if (opt == 'f') { + failnode_set = 1; + } else { + mop->mo_ldd.ldd_flags |= LDD_F_NO_PRIMNODE; + servicenode_set = 1; + } + mop->mo_flags |= MO_FAILOVER; break; } case 'G': @@ -1196,6 +478,22 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, case 'r': mop->mo_flags |= MO_FORCEFORMAT; break; + case 't': + if (!IS_MDT(&mop->mo_ldd) && !IS_OST(&mop->mo_ldd)) { + badopt(long_opt[longidx].name, "MDT,OST"); + return 1; + } + + if (!optarg) + return 1; + + rc = add_param(mop->mo_ldd.ldd_params, + PARAM_NETWORK, optarg); + if (rc != 0) + return rc; + /* Must update the mgs logs */ + mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; + break; case 'u': strscpy(mop->mo_ldd.ldd_userdata, optarg, sizeof(mop->mo_ldd.ldd_userdata)); @@ -1206,6 +504,12 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, case 'w': mop->mo_ldd.ldd_flags |= LDD_F_WRITECONF; break; + case 'U': + upgrade_to_18 = 1; + break; + case 'Q': + mop->mo_flags |= MO_QUOTA; + break; default: if (opt != '?') { fatal(); @@ -1215,236 +519,23 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, } }//while - /* Last arg is device */ - if (optind != argc - 1) { - fatal(); - fprintf(stderr, "Bad argument: %s\n", argv[optind]); - return EINVAL; - } - - return 0; -} - -#include - -#define LDISKFS_IOC_GETVERSION _IOR('f', 3, long) - -#ifndef TUNEFS /* mkfs.lustre */ -static int mkfs_iam_insert(int key_need_convert, char *keybuf, - int rec_need_convert, char *recbuf, char *filename) -{ - int fd; - int ret; - struct iam_uapi_info ua; - - fd = iam_open(filename, &ua); - if (fd < 0) { - fprintf(stderr, "failed to iam_open %s\n", filename); - return 1; - } - - ret = iam_insert(fd, &ua, - key_need_convert, keybuf, - rec_need_convert, recbuf); - iam_close(fd); - if (ret) { - fprintf(stderr, "failed to iam_insert %s\n", filename); - return 1; - } else { - return 0; - } -} - -static int touch_file(char *filename) -{ - int fd; - - if (filename == NULL) { - return 1; - } - - fd = open(filename, O_CREAT | O_TRUNC, 0600); - if (fd < 0) { - return 1; - } else { - close(fd); - return 0; - } -} - -static int get_generation(char *filename, unsigned long *result) -{ - int fd; - int ret; - - if (filename == NULL) { - return 1; - } - - fd = open(filename, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "%s: failed to open %s\n", - __FUNCTION__, filename); - return 1; - } - - ret = ioctl(fd, LDISKFS_IOC_GETVERSION, result); - close(fd); + if (optind == argc) { + /* The user didn't specify device name */ + fatal(); + fprintf(stderr, "Not enough arguments - device name or " + "pool/dataset name not specified.\n"); + return EINVAL; + } else { + /* The device or pool/filesystem name */ + strscpy(mop->mo_device, argv[optind], sizeof(mop->mo_device)); + + /* Followed by optional vdevs */ + if (optind < argc - 1) + mop->mo_pool_vdevs = (char **) &argv[optind + 1]; + } - return ((ret < 0) ? ret : 0); -} - -static int mkfs_mdt(struct mkfs_opts *mop) -{ - char mntpt[] = "/tmp/mntXXXXXX"; - char fstype[] = "ldiskfs"; - char filepnm[128]; - char recbuf[64]; - char *source; - int ret; - unsigned long generation; - struct stat st; - - source = mop->mo_device; - if (mop->mo_flags & MO_IS_LOOP) { - source = mop->mo_loopdev; - } - - if ((source == NULL) || (*source == 0)) { - return 1; - } - - if (!mkdtemp(mntpt)) { - fprintf(stderr, "%s: failed to mkdtemp %s\n", - __FUNCTION__, mntpt); - return errno; - } - - ret = mount(source, mntpt, fstype, 0, NULL); - if (ret) { - goto out_rmdir; - } - - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "seq_ctl"); - ret = touch_file(filepnm); - if (ret) { - goto out_umount; - } - - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "seq_srv"); - ret = touch_file(filepnm); - if (ret) { - goto out_umount; - } - - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "last_received"); - ret = touch_file(filepnm); - if (ret) { - goto out_umount; - } - - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "lov_objid"); - ret = touch_file(filepnm); - if (ret) { - goto out_umount; - } - - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "root"); - ret = iam_creat(filepnm, FMT_LVAR, L_BLOCK_SIZE, 4, 17, 4); - if (ret) { - goto out_umount; - } - - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "fld"); - ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 8, 8, 4); - if (ret) { - goto out_umount; - } - - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "orphans"); - ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 20, 8, 4); - if (ret) { - goto out_umount; - } - - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "oi.16"); - ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 16, 8, 4); - if (ret) { - goto out_umount; - } - - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "oi.5"); - ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 5, 8, 4); - if (ret) { - goto out_umount; - } - - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, CAPA_KEYS); - ret = touch_file(filepnm); - if (ret) { - goto out_umount; - } - - umount(mntpt); - ret = mount(source, mntpt, fstype, 0, NULL); - if (ret) { - goto out_rmdir; - } - - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "root"); - ret = iam_polymorph(filepnm, 040755); - if (ret) { - perror("IAM_IOC_POLYMORPH"); - goto out_umount; - } - - umount(mntpt); - ret = mount(source, mntpt, fstype, 0, NULL); - if (ret) { - goto out_rmdir; - } - - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "fld"); - ret = mkfs_iam_insert(1, "0000000000000002", 1, "0000000000000000", filepnm); - if (ret) { - goto out_umount; - } - - ret = mkfs_iam_insert(1, "0000000000000001", 1, "0000000000000000", filepnm); - if (ret) { - goto out_umount; - } - - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "root"); - ret = stat(filepnm, &st); - if (ret) { - goto out_umount; - } - - ret = get_generation(filepnm, &generation); - if (ret) { - goto out_umount; - } - - snprintf(recbuf, sizeof(recbuf) - 1, "110000000000000001%8.8x%8.8x", - (unsigned int)st.st_ino, (unsigned int)generation); - ret = mkfs_iam_insert(0, ".", 1, recbuf, filepnm); - if (ret) { - goto out_umount; - } - - ret = mkfs_iam_insert(0, "..", 1, recbuf, filepnm); - if (ret) { - goto out_umount; - } - -out_umount: - umount(mntpt); -out_rmdir: - rmdir(mntpt); - return ret; + return 0; } -#endif int main(int argc, char *const argv[]) { @@ -1453,6 +544,7 @@ int main(int argc, char *const argv[]) char *mountopts = NULL; char always_mountopts[512] = ""; char default_mountopts[512] = ""; + unsigned mount_type; int ret = 0; if ((progname = strrchr(argv[0], '/')) != NULL) @@ -1471,19 +563,16 @@ int main(int argc, char *const argv[]) /* device is last arg */ strscpy(mop.mo_device, argv[argc - 1], sizeof(mop.mo_device)); - /* Are we using a loop device? */ - ret = is_block(mop.mo_device); - if (ret < 0) - goto out; - if (ret == 0) - mop.mo_flags |= MO_IS_LOOP; + ret = osd_init(); + if (ret) + return ret; #ifdef TUNEFS /* For tunefs, we must read in the old values before parsing any new ones. */ /* Check whether the disk has already been formatted by mkfs.lustre */ - ret = is_lustre_target(&mop); + ret = osd_is_lustre(mop.mo_device, &mount_type); if (ret == 0) { fatal(); fprintf(stderr, "Device %s has not been formatted with " @@ -1491,14 +580,27 @@ int main(int argc, char *const argv[]) ret = ENODEV; goto out; } + mop.mo_ldd.ldd_mount_type = mount_type; - ret = read_local_files(&mop); + ret = osd_read_ldd(mop.mo_device, &mop.mo_ldd); if (ret) { fatal(); fprintf(stderr, "Failed to read previous Lustre data from %s " "(%d)\n", mop.mo_device, ret); goto out; } + mop.mo_ldd.ldd_flags &= ~(LDD_F_WRITECONF | LDD_F_VIRGIN); + + /* svname of the form lustre:OST1234 means never registered */ + ret = strlen(mop.mo_ldd.ldd_svname); + if (mop.mo_ldd.ldd_svname[ret - 8] == ':') { + mop.mo_ldd.ldd_svname[ret - 8] = '-'; + mop.mo_ldd.ldd_flags |= LDD_F_VIRGIN; + } else if (mop.mo_ldd.ldd_svname[ret - 8] == '=') { + mop.mo_ldd.ldd_svname[ret - 8] = '-'; + mop.mo_ldd.ldd_flags |= LDD_F_WRITECONF; + } + if (strstr(mop.mo_ldd.ldd_params, PARAM_MGSNODE)) mop.mo_mgs_failnodes++; @@ -1526,6 +628,10 @@ int main(int argc, char *const argv[]) goto out; } + /* Stand alone MGS doesn't need a index */ + if (!IS_MDT(ldd) && IS_MGS(ldd)) + mop.mo_ldd.ldd_flags &= ~LDD_F_NEED_INDEX; + if ((mop.mo_ldd.ldd_flags & (LDD_F_NEED_INDEX | LDD_F_UPGRADE14)) == (LDD_F_NEED_INDEX | LDD_F_UPGRADE14)) { fatal(); @@ -1534,6 +640,18 @@ int main(int argc, char *const argv[]) ret = EINVAL; goto out; } + + if (mop.mo_ldd.ldd_flags & LDD_F_NEED_INDEX) + fprintf(stderr, "warning: %s: for Lustre 2.4 and later, the " + "target index must be specified with --index\n", + mop.mo_device); + + /* If no index is supplied for MDT by default set index to zero */ + if (IS_MDT(ldd) && (ldd->ldd_svindex == INDEX_UNASSIGNED)) { + mop.mo_ldd.ldd_flags &= ~LDD_F_NEED_INDEX; + mop.mo_ldd.ldd_svindex = 0; + } + #if 0 /* * Comment out these 2 checks temporarily, since for multi-MDSes @@ -1554,51 +672,31 @@ int main(int argc, char *const argv[]) goto out; } #endif + if ((IS_MDT(ldd) || IS_OST(ldd)) && mop.mo_ldd.ldd_fsname[0] == '\0') { + fatal(); + fprintf(stderr, "Must specify --fsname for MDT/OST device\n"); + ret = EINVAL; + goto out; + } /* These are the permanent mount options (always included) */ - switch (ldd->ldd_mount_type) { - case LDD_MT_EXT3: - case LDD_MT_LDISKFS: - case LDD_MT_LDISKFS2: { - sprintf(always_mountopts, "errors=remount-ro"); - if (IS_MDT(ldd) || IS_MGS(ldd)) - strscat(always_mountopts, ",iopen_nopriv,user_xattr", - sizeof(always_mountopts)); - if ((get_os_version() == 24) && IS_OST(ldd)) - strscat(always_mountopts, ",asyncdel", - sizeof(always_mountopts)); - /* NB: Files created while extents are enabled cannot be read - if mounted with a kernel that doesn't include the CFS - patches! */ - if (IS_OST(ldd) && - (ldd->ldd_mount_type == LDD_MT_LDISKFS || - ldd->ldd_mount_type == LDD_MT_LDISKFS2)) { - strscat(default_mountopts, ",extents,mballoc", - sizeof(default_mountopts)); - } - break; - } - case LDD_MT_SMFS: { - mop.mo_flags |= MO_IS_LOOP; - sprintf(always_mountopts, "type=ext3,dev=%s", - mop.mo_device); - break; - } - default: { - fatal(); - fprintf(stderr, "unknown fs type %d '%s'\n", - ldd->ldd_mount_type, - MT_STR(ldd)); - ret = EINVAL; - goto out; - } - } + ret = osd_prepare_lustre(&mop, + default_mountopts, sizeof(default_mountopts), + always_mountopts, sizeof(always_mountopts)); + if (ret) { + fatal(); + fprintf(stderr, "unable to prepare backend (%d)\n", ret); + goto out; + } if (mountopts) { - /* If user specifies mount opts, don't use defaults, - but always use always_mountopts */ - sprintf(ldd->ldd_mount_opts, "%s,%s", - always_mountopts, mountopts); + trim_mountfsoptions(mountopts); + (void)check_mountfsoptions(mountopts, default_mountopts, 1); + if (check_mountfsoptions(mountopts, always_mountopts, 0)) { + ret = EINVAL; + goto out; + } + sprintf(ldd->ldd_mount_opts, "%s", mountopts); } else { #ifdef TUNEFS if (ldd->ldd_mount_opts[0] == 0) @@ -1607,6 +705,7 @@ int main(int argc, char *const argv[]) { sprintf(ldd->ldd_mount_opts, "%s%s", always_mountopts, default_mountopts); + trim_mountfsoptions(ldd->ldd_mount_opts); } } @@ -1621,8 +720,8 @@ int main(int argc, char *const argv[]) goto out; } - if (check_mtab_entry(mop.mo_device)) - return(EEXIST); + if (check_mtab_entry(mop.mo_device, mop.mo_device, NULL, NULL)) + return(EEXIST); /* Create the loopback file */ if (mop.mo_flags & MO_IS_LOOP) { @@ -1631,8 +730,11 @@ int main(int argc, char *const argv[]) ret = errno; #ifndef TUNEFS /* mkfs.lustre */ /* Reformat the loopback file */ - if (ret || (mop.mo_flags & MO_FORCEFORMAT)) + if (ret || (mop.mo_flags & MO_FORCEFORMAT)) { ret = loop_format(&mop); + if (ret) + goto out; + } #endif if (ret == 0) ret = loop_setup(&mop); @@ -1647,7 +749,7 @@ int main(int argc, char *const argv[]) #ifndef TUNEFS /* mkfs.lustre */ /* Check whether the disk has already been formatted by mkfs.lustre */ if (!(mop.mo_flags & MO_FORCEFORMAT)) { - ret = is_lustre_target(&mop); + ret = osd_is_lustre(mop.mo_device, &mount_type); if (ret) { fatal(); fprintf(stderr, "Device %s was previously formatted " @@ -1659,34 +761,40 @@ int main(int argc, char *const argv[]) } /* Format the backing filesystem */ - ret = make_lustre_backfs(&mop); + ret = osd_make_lustre(&mop); if (ret != 0) { fatal(); fprintf(stderr, "mkfs failed %d\n", ret); goto out; } +#else + /* update svname with '=' to refresh config */ + if (mop.mo_ldd.ldd_flags & LDD_F_WRITECONF) { + struct mount_opts opts; + opts.mo_ldd = mop.mo_ldd; + opts.mo_source = mop.mo_device; + (void) osd_label_lustre(&opts); + } + + /* Enable quota accounting */ + if (mop.mo_flags & MO_QUOTA) { + ret = osd_enable_quota(&mop); + goto out; + } + #endif /* Write our config files */ - ret = write_local_files(&mop); + ret = osd_write_ldd(&mop); if (ret != 0) { fatal(); fprintf(stderr, "failed to write local files\n"); goto out; } -#ifndef TUNEFS /* mkfs.lustre */ - if (IS_MDT(ldd)) { - ret = mkfs_mdt(&mop); - if (ret != 0) { - fprintf(stderr, "failed to mkfs_mdt\n"); - goto out; - } - } -#endif - out: loop_cleanup(&mop); + osd_fini(); /* Fix any crazy return values from system() */ if (ret && ((ret & 255) == 0))