X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Futils%2Fmkfs_lustre.c;h=0b1880f54a1a268f304a0702980b3fd62d79395e;hb=43b34aced1895199412940138fd7d0f4a3f845e3;hp=dc5ba4e496edf880bba414acb91c4b009f67a1ab;hpb=552e2278ef040f1f4a14767406692a0274f30a09;p=fs%2Flustre-release.git diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index dc5ba4e..0b1880f 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -33,13 +33,15 @@ #include #include #include +#include #include #include #include #ifdef __linux__ -# include /* for BLKGETSIZE64 */ +#include /* for BLKGETSIZE64 */ +#include #endif #include #include @@ -59,26 +61,25 @@ /* used to describe the options to format the lustre disk, not persistent */ struct mkfs_opts { struct lustre_disk_data mo_ldd; /* to be written in MOUNT_DATA_FILE */ - char mo_mount_type_string[20]; /* "ext3", "ldiskfs", ... */ char mo_device[128]; /* disk device name */ char mo_mkfsopts[128]; /* options to the backing-store mkfs */ char mo_loopdev[128]; /* in case a loop dev is needed */ __u64 mo_device_sz; /* in KB */ int mo_stripe_count; - int mo_flags; + int mo_flags; int mo_mgs_failnodes; }; static char *progname; static int verbose = 1; static int print_only = 0; - +static int failover = 0; void usage(FILE *out) { fprintf(out, "%s v"LUSTRE_VERSION_STRING"\n", progname); fprintf(out, "usage: %s [options] \n", progname); - fprintf(out, + fprintf(out, "\t:block device or file (e.g /dev/sda or /tmp/ost1)\n" "\ttarget types:\n" "\t\t--ost: object storage, mutually exclusive with mdt,mgs\n" @@ -129,7 +130,23 @@ static void fatal(void) /*================ utility functions =====================*/ -inline unsigned int +char *strscat(char *dst, char *src, int buflen) { + dst[buflen - 1] = 0; + if (strlen(dst) + strlen(src) >= buflen) { + fprintf(stderr, "string buffer overflow (max %d): '%s' + '%s'" + "\n", buflen, dst, src); + exit(EOVERFLOW); + } + return strcat(dst, src); + +} + +char *strscpy(char *dst, char *src, int buflen) { + dst[0] = 0; + return strscat(dst, src, buflen); +} + +inline unsigned int dev_major (unsigned long long int __dev) { return ((__dev >> 8) & 0xfff) | ((unsigned int) (__dev >> 32) & ~0xfff); @@ -150,16 +167,16 @@ int get_os_version() char release[4] = ""; fd = open("/proc/sys/kernel/osrelease", O_RDONLY); - if (fd < 0) + if (fd < 0) fprintf(stderr, "%s: Warning: Can't resolve kernel " "version, assuming 2.6\n", progname); else { read(fd, release, 4); close(fd); } - if (strncmp(release, "2.4.", 4) == 0) + if (strncmp(release, "2.4.", 4) == 0) version = 24; - else + else version = 26; } return version; @@ -169,7 +186,7 @@ int run_command(char *cmd, int cmdsz) { char log[] = "/tmp/mkfs_logXXXXXX"; int fd = -1, rc; - + if ((cmdsz - strlen(cmd)) < 6) { fatal(); fprintf(stderr, "Command buffer overflow: %.*s...\n", @@ -201,10 +218,10 @@ int run_command(char *cmd, int cmdsz) fclose(fp); } } - if (fd >= 0) + if (fd >= 0) remove(log); return rc; -} +} static int check_mtab_entry(char *spec) { @@ -252,12 +269,13 @@ int loop_setup(struct mkfs_opts *mop) for (i = 0; i < MAX_LOOP_DEVICES; i++) { char cmd[PATH_MAX]; int cmdsz = sizeof(cmd); + sprintf(l_device, "%s%d", loop_base, i); - if (access(l_device, F_OK | R_OK)) + if (access(l_device, F_OK | R_OK)) break; snprintf(cmd, cmdsz, "losetup %s > /dev/null 2>&1", l_device); ret = system(cmd); - + /* losetup gets 1 (ret=256) for non-set-up device */ if (ret) { /* Set up a loopback device to our file */ @@ -269,18 +287,19 @@ int loop_setup(struct mkfs_opts *mop) progname, ret, strerror(ret)); return ret; } - strcpy(mop->mo_loopdev, l_device); + strscpy(mop->mo_loopdev, l_device, + sizeof(mop->mo_loopdev)); return ret; } } - + fprintf(stderr, "%s: out of loop devices!\n", progname); return EMFILE; -} +} int loop_cleanup(struct mkfs_opts *mop) { - char cmd[128]; + char cmd[150]; int ret = 1; if ((mop->mo_flags & MO_IS_LOOP) && *mop->mo_loopdev) { sprintf(cmd, "losetup -d %s", mop->mo_loopdev); @@ -296,7 +315,7 @@ int is_block(char* devname) int ret = 0; ret = access(devname, F_OK); - if (ret != 0) + if (ret != 0) return 0; ret = stat(devname, &st); if (ret != 0) { @@ -306,14 +325,14 @@ int is_block(char* devname) return S_ISBLK(st.st_mode); } -__u64 get_device_size(char* device) +__u64 get_device_size(char* device) { int ret, fd; __u64 size = 0; fd = open(device, O_RDONLY); if (fd < 0) { - fprintf(stderr, "%s: cannot open %s: %s\n", + fprintf(stderr, "%s: cannot open %s: %s\n", progname, device, strerror(errno)); return 0; } @@ -326,16 +345,16 @@ __u64 get_device_size(char* device) __u32 lsize = 0; /* size in blocks */ ret = ioctl(fd, BLKGETSIZE, (void*)&lsize); - size = (__u64)lsize * 512; + size = (__u64)lsize * 512; } #endif close(fd); if (ret < 0) { - fprintf(stderr, "%s: size ioctl failed: %s\n", + fprintf(stderr, "%s: size ioctl failed: %s\n", progname, strerror(errno)); return 0; } - + vprint("device size = "LPU64"MB\n", size >> 20); /* return value in KB */ return size >> 10; @@ -344,7 +363,7 @@ __u64 get_device_size(char* device) int loop_format(struct mkfs_opts *mop) { int ret = 0; - + if (mop->mo_device_sz == 0) { fatal(); fprintf(stderr, "loop device requires a --device-size= " @@ -356,13 +375,39 @@ int loop_format(struct mkfs_opts *mop) ret = truncate(mop->mo_device, mop->mo_device_sz * 1024); if (ret != 0) { ret = errno; - fprintf(stderr, "%s: Unable to create backing store: %d\n", + fprintf(stderr, "%s: Unable to create backing store: %d\n", progname, ret); } return ret; } +/* Display the need for the latest e2fsprogs to be installed. make_backfs + * indicates if the caller is make_lustre_backfs() or not. */ +static void disp_old_e2fsprogs_msg(const char *feature, int make_backfs) +{ + static int msg_displayed; + + if (msg_displayed) { + fprintf(stderr, "WARNING: e2fsprogs does not support %s " + "feature.\n\n", feature); + return; + } + + msg_displayed++; + + fprintf(stderr, "WARNING: The e2fsprogs package currently installed on " + "your system does not support \"%s\" feature.\nPlease install " + "the latest version of e2fsprogs from http://www.clusterfs.com/" + "downloads/public/Lustre/Tools/e2fsprogs/\nto enable this " + "feature.\n", feature); + + if (make_backfs) + fprintf(stderr, "Feature will not be enabled until e2fsprogs " + "is updated and 'tune2fs -O %s %%{device}' " + "is run.\n\n", feature); +} + /* Check whether the file exists in the device */ static int file_in_dev(char *file_name, char *dev_name) { @@ -372,7 +417,7 @@ static int file_in_dev(char *file_name, char *dev_name) int i; /* Construct debugfs command line. */ - snprintf(debugfs_cmd, sizeof(debugfs_cmd), + snprintf(debugfs_cmd, sizeof(debugfs_cmd), "debugfs -c -R 'stat %s' %s 2>&1 | egrep '(Inode|unsupported)'", file_name, dev_name); @@ -391,12 +436,7 @@ static int file_in_dev(char *file_name, char *dev_name) debugfs_cmd[i] = 0; fprintf(stderr, "%s", debugfs_cmd); if (strstr(debugfs_cmd, "unsupported feature")) { - fprintf(stderr, "In all likelihood, the " - "'unsupported feature' is 'extents', which " - "older debugfs does not understand.\n" - "Use e2fsprogs-1.38-cfs1 or later, available " - "from ftp://ftp.lustre.org/pub/lustre/other/" - "e2fsprogs/\n"); + disp_old_e2fsprogs_msg("an unknown", 0); } return -1; } @@ -409,33 +449,116 @@ static int is_lustre_target(struct mkfs_opts *mop) { int rc; vprint("checking for existing Lustre data: "); - + if ((rc = file_in_dev(MOUNT_DATA_FILE, mop->mo_device))) { - vprint("found %s\n", + vprint("found %s\n", (rc == 1) ? MOUNT_DATA_FILE : "extents"); /* in the -1 case, 'extents' means this really IS a lustre target */ return rc; } - - if ((rc = file_in_dev(LAST_RCVD, mop->mo_device))) { + + if ((rc = file_in_dev(LAST_RCVD, mop->mo_device))) { vprint("found %s\n", LAST_RCVD); - return rc; + return rc; } vprint("not found\n"); return 0; /* The device is not a lustre target. */ } +/* Check if a certain feature is supported by e2fsprogs. + * Firstly we try to use "debugfs supported_features" command to check if + * the feature is supported. If this fails we try to set this feature with + * mke2fs to check for its support. */ +static int is_e2fsprogs_feature_supp(const char *feature) +{ + FILE *fp; + char cmd[PATH_MAX]; + char imgname[] = "/tmp/test-img-XXXXXX"; + int fd = -1; + int ret = 0; + + snprintf(cmd, sizeof(cmd), + "debugfs -c -R \"supported_features %s\" 2>&1", feature); + + /* Using popen() instead of run_command() since debugfs does not return + * proper error code if command is not supported */ + fp = popen(cmd, "r"); + if (!fp) { + fprintf(stderr, "%s: %s\n", progname, strerror(errno)); + return 0; + } + ret = fread(cmd, 1, sizeof(cmd), fp); + if (ret > 0) { + if (strstr(cmd, feature) && !(strstr(cmd, "Unknown"))) + return 0; + } + + if ((fd = mkstemp(imgname)) < 0) + return -1; + + snprintf(cmd, sizeof(cmd), "mke2fs -F -O %s %s 100 >/dev/null 2>&1", + feature, imgname); + /* run_command() displays the output of mke2fs when it fails for + * some feature, so use system() directly */ + ret = system(cmd); + if (fd >= 0) + remove(imgname); + + return ret; +} + +static void disp_old_kernel_msg(char *feature) +{ + fprintf(stderr, "WARNING: ldiskfs filesystem does not support \"%s\" " + "feature.\n\n", feature); +} + +static void enable_default_backfs_features(struct mkfs_opts *mop) +{ + struct utsname uts; + int maj_high, maj_low, min; + int ret; + + strscat(mop->mo_mkfsopts, " -O dir_index", sizeof(mop->mo_mkfsopts)); + + if (is_e2fsprogs_feature_supp("uninit_groups") == 0) + strscat(mop->mo_mkfsopts, ",uninit_groups", + sizeof(mop->mo_mkfsopts)); + else + disp_old_e2fsprogs_msg("uninit_groups", 1); + + ret = uname(&uts); + if (ret) + return; + + sscanf(uts.release, "%d.%d.%d", &maj_high, &maj_low, &min); + printf("%d %d %d\n", maj_high, maj_low, min); + + /* Multiple mount protection is enabled only if failover node is + * specified and if kernel version is higher than 2.6.9 */ + if (failover) { + if (KERNEL_VERSION(maj_high, maj_low, min) >= + KERNEL_VERSION(2,6,9)) { + if (is_e2fsprogs_feature_supp("mmp") == 0) + strscat(mop->mo_mkfsopts, ",mmp", + sizeof(mop->mo_mkfsopts)); + else + disp_old_e2fsprogs_msg("mmp", 1); + } else { + disp_old_kernel_msg("mmp"); + } + } +} /* Build fs according to type */ int make_lustre_backfs(struct mkfs_opts *mop) { char mkfs_cmd[PATH_MAX]; - char buf[40]; + char buf[64]; char *dev; int ret = 0; int block_count = 0; - int left = sizeof(mkfs_cmd); if (mop->mo_device_sz != 0) { if (mop->mo_device_sz < 8096){ @@ -448,7 +571,8 @@ int make_lustre_backfs(struct mkfs_opts *mop) } if ((mop->mo_ldd.ldd_mount_type == LDD_MT_EXT3) || - (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS)) { + (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS) || + (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS2)) { __u64 device_sz = mop->mo_device_sz; /* we really need the size */ @@ -470,30 +594,34 @@ int make_lustre_backfs(struct mkfs_opts *mop) journal_sz = max_sz; if (journal_sz) { sprintf(buf, " -J size=%ld", journal_sz); - strcat(mop->mo_mkfsopts, buf); + strscat(mop->mo_mkfsopts, buf, + sizeof(mop->mo_mkfsopts)); } } /* bytes_per_inode: disk size / num inodes */ if (strstr(mop->mo_mkfsopts, "-i") == NULL) { long bytes_per_inode = 0; - - if (IS_MDT(&mop->mo_ldd)) + + if (IS_MDT(&mop->mo_ldd)) bytes_per_inode = 4096; /* Allocate fewer inodes on large OST devices. Most - filesystems can be much more aggressive than even + filesystems can be much more aggressive than even this. */ - if ((IS_OST(&mop->mo_ldd) && (device_sz > 1000000))) + if ((IS_OST(&mop->mo_ldd) && (device_sz > 1000000))) bytes_per_inode = 16384; - + if (bytes_per_inode > 0) { sprintf(buf, " -i %ld", bytes_per_inode); - strcat(mop->mo_mkfsopts, buf); + strscat(mop->mo_mkfsopts, buf, + sizeof(mop->mo_mkfsopts)); } } - - /* Inode size (for extended attributes) */ + + /* Inode size (for extended attributes). The LOV EA size is + * 32 (EA hdr) + 32 (lov_mds_md) + stripes * 24 (lov_ost_data), + * and we want some margin above that for ACLs, other EAs... */ if (strstr(mop->mo_mkfsopts, "-I") == NULL) { long inode_size = 0; if (IS_MDT(&mop->mo_ldd)) { @@ -514,48 +642,48 @@ int make_lustre_backfs(struct mkfs_opts *mop) if (inode_size > 0) { sprintf(buf, " -I %ld", inode_size); - strcat(mop->mo_mkfsopts, buf); + strscat(mop->mo_mkfsopts, buf, + sizeof(mop->mo_mkfsopts)); } - } if (verbose < 2) { - strcat(mop->mo_mkfsopts, " -q"); + strscat(mop->mo_mkfsopts, " -q", + sizeof(mop->mo_mkfsopts)); } - /* Enable hashed b-tree directory lookup in large dirs bz6224 */ - if (strstr(mop->mo_mkfsopts, "-O") == NULL) { - strcat(mop->mo_mkfsopts, " -O dir_index"); - } + if (strstr(mop->mo_mkfsopts, "-O") == NULL) + enable_default_backfs_features(mop); - /* Allow reformat of full devices (as opposed to + /* Allow reformat of full devices (as opposed to partitions.) We already checked for mounted dev. */ - strcat(mop->mo_mkfsopts, " -F"); + strscat(mop->mo_mkfsopts, " -F", sizeof(mop->mo_mkfsopts)); - left -= snprintf(mkfs_cmd, left, - "mkfs.ext2 -j -b %d -L %s ", L_BLOCK_SIZE, - mop->mo_ldd.ldd_svname); + snprintf(mkfs_cmd, sizeof(mkfs_cmd), + "mkfs.ext2 -j -b %d -L %s ", L_BLOCK_SIZE, + mop->mo_ldd.ldd_svname); } else if (mop->mo_ldd.ldd_mount_type == LDD_MT_REISERFS) { long journal_sz = 0; /* FIXME default journal size */ - if (journal_sz > 0) { + if (journal_sz > 0) { sprintf(buf, " --journal_size %ld", journal_sz); - strcat(mop->mo_mkfsopts, buf); + strscat(mop->mo_mkfsopts, buf, + sizeof(mop->mo_mkfsopts)); } - left -= snprintf(mkfs_cmd, left, "mkreiserfs -ff "); + snprintf(mkfs_cmd, sizeof(mkfs_cmd), "mkreiserfs -ff "); } else { fprintf(stderr,"%s: unsupported fs type: %d (%s)\n", - progname, mop->mo_ldd.ldd_mount_type, + progname, mop->mo_ldd.ldd_mount_type, MT_STR(&mop->mo_ldd)); return EINVAL; } /* For loop device format the dev, not the filename */ dev = mop->mo_device; - if (mop->mo_flags & MO_IS_LOOP) + if (mop->mo_flags & MO_IS_LOOP) dev = mop->mo_loopdev; - + vprint("formatting backing filesystem %s on %s\n", MT_STR(&mop->mo_ldd), dev); vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname); @@ -563,16 +691,12 @@ int make_lustre_backfs(struct mkfs_opts *mop) vprint("\toptions %s\n", mop->mo_mkfsopts); /* mkfs_cmd's trailing space is important! */ - strncat(mkfs_cmd, mop->mo_mkfsopts, left); - left = sizeof(mkfs_cmd) - strlen(mkfs_cmd) - 1; - strncat(mkfs_cmd, " ", left); - left = sizeof(mkfs_cmd) - strlen(mkfs_cmd) - 1; - strncat(mkfs_cmd, dev, left); - left = sizeof(mkfs_cmd) - strlen(mkfs_cmd) - 1; + strscat(mkfs_cmd, mop->mo_mkfsopts, sizeof(mkfs_cmd)); + strscat(mkfs_cmd, " ", sizeof(mkfs_cmd)); + strscat(mkfs_cmd, dev, sizeof(mkfs_cmd)); if (block_count != 0) { sprintf(buf, " %d", block_count); - strncat(mkfs_cmd, buf, left); - left = sizeof(mkfs_cmd) - strlen(mkfs_cmd) - 1; + strscat(mkfs_cmd, buf, sizeof(mkfs_cmd)); } vprint("mkfs_cmd = %s\n", mkfs_cmd); @@ -590,7 +714,7 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) { printf("\n %s:\n", str); printf("Target: %s\n", ldd->ldd_svname); - if (ldd->ldd_svindex == INDEX_UNASSIGNED) + if (ldd->ldd_svindex == INDEX_UNASSIGNED) printf("Index: unassigned\n"); else printf("Index: %d\n", ldd->ldd_svindex); @@ -600,7 +724,7 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) printf("Mount type: %s\n", MT_STR(ldd)); printf("Flags: %#x\n", ldd->ldd_flags); printf(" (%s%s%s%s%s%s%s%s)\n", - IS_MDT(ldd) ? "MDT ":"", + IS_MDT(ldd) ? "MDT ":"", IS_OST(ldd) ? "OST ":"", IS_MGS(ldd) ? "MGS ":"", ldd->ldd_flags & LDD_F_NEED_INDEX ? "needs_index ":"", @@ -632,16 +756,16 @@ int write_local_files(struct mkfs_opts *mop) } dev = mop->mo_device; - if (mop->mo_flags & MO_IS_LOOP) + if (mop->mo_flags & MO_IS_LOOP) dev = mop->mo_loopdev; - + ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0, NULL); if (ret) { - fprintf(stderr, "%s: Unable to mount %s: %s\n", + fprintf(stderr, "%s: Unable to mount %s: %s\n", progname, dev, strerror(errno)); ret = errno; if (errno == ENODEV) { - fprintf(stderr, "Is the %s module available?\n", + fprintf(stderr, "Is the %s module available?\n", MT_STR(&mop->mo_ldd)); } goto out_rmdir; @@ -651,7 +775,7 @@ int write_local_files(struct mkfs_opts *mop) sprintf(filepnm, "%s/%s", mntpt, MOUNT_CONFIGS_DIR); ret = mkdir(filepnm, 0777); if ((ret != 0) && (errno != EEXIST)) { - fprintf(stderr, "%s: Can't make configs dir %s: %s\n", + fprintf(stderr, "%s: Can't make configs dir %s: %s\n", progname, filepnm, strerror(errno)); goto out_umnt; } else if (errno == EEXIST) { @@ -670,19 +794,19 @@ int write_local_files(struct mkfs_opts *mop) } fwrite(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep); fclose(filep); - + /* COMPAT_146 */ #ifdef TUNEFS /* Check for upgrade */ - if ((mop->mo_ldd.ldd_flags & (LDD_F_UPGRADE14 | LDD_F_SV_TYPE_MGS)) + if ((mop->mo_ldd.ldd_flags & (LDD_F_UPGRADE14 | LDD_F_SV_TYPE_MGS)) == (LDD_F_UPGRADE14 | LDD_F_SV_TYPE_MGS)) { char cmd[128]; char *term; int cmdsz = sizeof(cmd); vprint("Copying old logs\n"); - + /* Copy the old client log to fsname-client */ - sprintf(filepnm, "%s/%s/%s-client", + sprintf(filepnm, "%s/%s/%s-client", mntpt, MOUNT_CONFIGS_DIR, mop->mo_ldd.ldd_fsname); snprintf(cmd, cmdsz, "cp %s/%s/client %s", mntpt, MDT_LOGS_DIR, filepnm); @@ -694,24 +818,24 @@ int write_local_files(struct mkfs_opts *mop) "find the client log for fs %s and " "copy it manually into %s/%s-client, " "then umount.\n", - mop->mo_device, + mop->mo_device, mop->mo_ldd.ldd_fsname, MOUNT_CONFIGS_DIR, mop->mo_ldd.ldd_fsname); goto out_umnt; } /* We need to use the old mdt log because otherwise mdt won't - have complete lov if old clients connect before all + have complete lov if old clients connect before all servers upgrade. */ /* Copy the old mdt log to fsname-MDT0000 (get old name from mdt_UUID) */ ret = 1; - strcpy(filepnm, mop->mo_ldd.ldd_uuid); + strscpy(filepnm, (char *)mop->mo_ldd.ldd_uuid, sizeof(filepnm)); term = strstr(filepnm, "_UUID"); if (term) { *term = '\0'; snprintf(cmd, cmdsz, "cp %s/%s/%s %s/%s/%s", - mntpt, MDT_LOGS_DIR, filepnm, + mntpt, MDT_LOGS_DIR, filepnm, mntpt, MOUNT_CONFIGS_DIR, mop->mo_ldd.ldd_svname); ret = run_command(cmd, cmdsz); @@ -723,7 +847,7 @@ int write_local_files(struct mkfs_opts *mop) "find the MDT log for fs %s and " "copy it manually into %s/%s, " "then umount.\n", - mop->mo_device, + mop->mo_device, mop->mo_ldd.ldd_fsname, MOUNT_CONFIGS_DIR, mop->mo_ldd.ldd_svname); goto out_umnt; @@ -734,7 +858,7 @@ int write_local_files(struct mkfs_opts *mop) out_umnt: - umount(mntpt); + umount(mntpt); out_rmdir: rmdir(mntpt); return ret; @@ -777,10 +901,10 @@ int read_local_files(struct mkfs_opts *mop) /* COMPAT_146 */ /* Try to read pre-1.6 config from last_rcvd */ struct lr_server_data lsd; - verrprint("%s: Unable to read %s (%s).\n", - progname, filepnm, strerror(errno)); + verrprint("%s: Unable to read %d.%d config %s.\n", + progname, LUSTRE_MAJOR, LUSTRE_MINOR, filepnm); - verrprint("Trying last_rcvd\n"); + verrprint("Trying 1.4 config from last_rcvd\n"); sprintf(filepnm, "%s/%s", tmpdir, LAST_RCVD); /* Construct debugfs command line. */ @@ -793,7 +917,7 @@ int read_local_files(struct mkfs_opts *mop) progname, LAST_RCVD, ret); goto out_rmdir; } - + filep = fopen(filepnm, "r"); if (!filep) { fprintf(stderr, "%s: Unable to open %s: %s\n", @@ -830,7 +954,7 @@ int read_local_files(struct mkfs_opts *mop) /* We must co-locate so mgs can see old logs. If user doesn't want this, they can copy the old logs manually and re-tunefs. */ - mop->mo_ldd.ldd_flags = + mop->mo_ldd.ldd_flags = LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_MGS; mop->mo_ldd.ldd_svindex = lsd.lsd_mdt_index; } else { @@ -848,9 +972,9 @@ int read_local_files(struct mkfs_opts *mop) /* If there's a LOGS dir, it's an MDT */ if ((ret = access(filepnm, F_OK)) == 0) { mop->mo_ldd.ldd_flags = - LDD_F_SV_TYPE_MDT | + LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_MGS; - /* Old MDT's are always index 0 + /* Old MDT's are always index 0 (pre CMD) */ mop->mo_ldd.ldd_svindex = 0; } else { @@ -861,20 +985,20 @@ int read_local_files(struct mkfs_opts *mop) } } } - + ret = 0; - memcpy(mop->mo_ldd.ldd_uuid, lsd.lsd_uuid, + memcpy(mop->mo_ldd.ldd_uuid, lsd.lsd_uuid, sizeof(mop->mo_ldd.ldd_uuid)); mop->mo_ldd.ldd_flags |= LDD_F_UPGRADE14; } /* end COMPAT_146 */ -out_close: +out_close: fclose(filep); out_rmdir: snprintf(cmd, cmdsz, "rm -rf %s", tmpdir); run_command(cmd, cmdsz); - if (ret) + if (ret) verrprint("Failed to read old data (%d)\n", ret); return ret; } @@ -887,11 +1011,11 @@ void set_defaults(struct mkfs_opts *mop) mop->mo_ldd.ldd_flags = LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_VIRGIN; mop->mo_mgs_failnodes = 0; strcpy(mop->mo_ldd.ldd_fsname, "lustre"); - if (get_os_version() == 24) + if (get_os_version() == 24) mop->mo_ldd.ldd_mount_type = LDD_MT_EXT3; - else + else mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS; - + mop->mo_ldd.ldd_svindex = INDEX_UNASSIGNED; mop->mo_stripe_count = 1; } @@ -903,14 +1027,45 @@ static inline void badopt(const char *opt, char *type) usage(stderr); } -static int add_param(char *buf, char *key, char *val) +static int clean_param(char *buf, char *key) +{ + char *sub, *next; + + if (!buf) + return 1; + if ((sub = strstr(buf, key)) != NULL) { + if ((next = strchr(sub, ' ')) != NULL) { + next++; + memmove(sub, next, strlen(next) + 1); + } else { + *sub = '\0'; + } + } + return 0; +} + +static int add_param(char *buf, char *key, char *val, int unique) { int end = sizeof(((struct lustre_disk_data *)0)->ldd_params); - int start = strlen(buf); + int start; int keylen = 0; + char *ptr; - if (key) + if (key) keylen = strlen(key); + if (unique) { + if (key) { + clean_param(buf, key); + } else { + if ((ptr = strchr(val, '=')) == NULL) + return 1; + *ptr = '\0'; + clean_param(buf, val); + *ptr = '='; + } + } + + start = strlen(buf); if (start + 1 + keylen + strlen(val) >= end) { fprintf(stderr, "%s: params are too long-\n%s %s%s\n", progname, buf, key ? key : "", val); @@ -929,26 +1084,26 @@ static char *convert_hostnames(char *s1) char *converted, *s2 = 0, *c; int left = MAXNIDSTR; lnet_nid_t nid; - + converted = malloc(left); c = converted; while ((left > 0) && ((s2 = strsep(&s1, ",: \0")))) { nid = libcfs_str2nid(s2); if (nid == LNET_NID_ANY) { - if (*s2 == '/') + if (*s2 == '/') /* end of nids */ break; - fprintf(stderr, "%s: Can't parse NID '%s'\n", + fprintf(stderr, "%s: Can't parse NID '%s'\n", progname, s2); free(converted); return NULL; } - if (strncmp(libcfs_nid2str(nid), "127.0.0.1", + if (strncmp(libcfs_nid2str(nid), "127.0.0.1", strlen("127.0.0.1")) == 0) { fprintf(stderr, "%s: The NID '%s' resolves to the " "loopback address '%s'. Lustre requires a " - "non-loopback address.\n", + "non-loopback address.\n", progname, s2, libcfs_nid2str(nid)); free(converted); return NULL; @@ -997,8 +1152,10 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqru:vw"; int opt; int rc, longidx; + int upcall = 0; + const size_t prefix_len = sizeof(PARAM_MDT_UPCALL) - 1; - while ((opt = getopt_long(argc, argv, optstring, long_opt, &longidx)) != + while ((opt = getopt_long(argc, argv, optstring, long_opt, &longidx)) != EOF) { switch (opt) { case 'b': { @@ -1031,7 +1188,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, printf("Configdev not implemented\n"); return 1; case 'd': - mop->mo_device_sz = atol(optarg); + mop->mo_device_sz = atol(optarg); break; case 'e': mop->mo_ldd.ldd_params[0] = '\0'; @@ -1040,21 +1197,22 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, break; case 'f': { char *nids = convert_hostnames(optarg); - if (!nids) + if (!nids) return 1; - rc = add_param(mop->mo_ldd.ldd_params, PARAM_FAILNODE, - nids); + rc = add_param(mop->mo_ldd.ldd_params, PARAM_FAILNODE, + nids, 0); /* Combo needs to add MDT failnodes as MGS failnodes as well */ if (!rc && IS_MGS(&mop->mo_ldd)) { - rc = add_param(mop->mo_ldd.ldd_params, - PARAM_MGSNODE, nids); + rc = add_param(mop->mo_ldd.ldd_params, + PARAM_MGSNODE, nids, 0); } free(nids); - if (rc) + if (rc) return rc; /* Must update the mgs logs */ mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; + failover = 1; break; } case 'G': @@ -1064,7 +1222,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, usage(stdout); return 1; case 'i': - if (!(mop->mo_ldd.ldd_flags & + if (!(mop->mo_ldd.ldd_flags & (LDD_F_UPGRADE14 | LDD_F_VIRGIN | LDD_F_WRITECONF))) { fprintf(stderr, "%s: cannot change the index of" @@ -1080,13 +1238,13 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, } break; case 'k': - strncpy(mop->mo_mkfsopts, optarg, - sizeof(mop->mo_mkfsopts) - 1); + strscpy(mop->mo_mkfsopts, optarg, + sizeof(mop->mo_mkfsopts)); break; case 'L': { char *tmp; if (!(mop->mo_flags & MO_FORCEFORMAT) && - (!(mop->mo_ldd.ldd_flags & + (!(mop->mo_ldd.ldd_flags & (LDD_F_UPGRADE14 | LDD_F_VIRGIN | LDD_F_WRITECONF)))) { fprintf(stderr, "%s: cannot change the name of" @@ -1103,18 +1261,18 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, "filesystem name\n", progname, *tmp); return 1; } - strncpy(mop->mo_ldd.ldd_fsname, optarg, - sizeof(mop->mo_ldd.ldd_fsname) - 1); + strscpy(mop->mo_ldd.ldd_fsname, optarg, + sizeof(mop->mo_ldd.ldd_fsname)); break; } case 'm': { char *nids = convert_hostnames(optarg); - if (!nids) + if (!nids) return 1; - rc = add_param(mop->mo_ldd.ldd_params, PARAM_MGSNODE, - nids); + rc = add_param(mop->mo_ldd.ldd_params, PARAM_MGSNODE, + nids, 0); free(nids); - if (rc) + if (rc) return rc; mop->mo_mgs_failnodes++; break; @@ -1135,8 +1293,20 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, mop->mo_ldd.ldd_flags |= LDD_F_SV_TYPE_OST; break; case 'p': - rc = add_param(mop->mo_ldd.ldd_params, NULL, optarg); - if (rc) + /* Test if the param is valid for mdt.group_upcall */ + if (!strncmp(optarg, PARAM_MDT_UPCALL, prefix_len)) { + upcall++; + if (strcmp(optarg + prefix_len, "NONE") && + access(optarg + prefix_len, R_OK | X_OK)) + fprintf(stderr, "WARNING: group upcall " + "parameter not executable: %s\n" + "NOTE: you can change the path " + "to the group upcall through " + "tunefs.lustre(8)\n", optarg + + prefix_len); + } + rc = add_param(mop->mo_ldd.ldd_params, NULL, optarg, 0); + if (rc) return rc; /* Must update the mgs logs */ mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; @@ -1148,10 +1318,8 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, mop->mo_flags |= MO_FORCEFORMAT; break; case 'u': - strncpy(mop->mo_ldd.ldd_userdata, optarg, + strscpy(mop->mo_ldd.ldd_userdata, optarg, sizeof(mop->mo_ldd.ldd_userdata)); - mop->mo_ldd.ldd_userdata[ - sizeof(mop->mo_ldd.ldd_userdata) - 1] = 0; break; case 'v': verbose++; @@ -1164,17 +1332,33 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, fatal(); fprintf(stderr, "Unknown option '%c'\n", opt); } - usage(stderr); - return 1; + return EINVAL; } }//while - if (optind >= argc) { + + /* Last arg is device */ + if (optind != argc - 1) { fatal(); - fprintf(stderr, "Bad arguments\n"); - usage(stderr); - return 1; + fprintf(stderr, "Bad argument: %s\n", argv[optind]); + return EINVAL; } +#ifndef TUNEFS + if (mop->mo_ldd.ldd_flags & LDD_F_SV_TYPE_MDT && 0 == upcall) { + if (access("/usr/sbin/l_getgroups", R_OK | X_OK)) { + fprintf(stderr, "WARNING: MDS group upcall is not set, " + "using 'NONE'\n"); + } else { + rc = add_param(mop->mo_ldd.ldd_params, PARAM_MDT_UPCALL, + "/usr/sbin/l_getgroups", 1); + if (rc) + return rc; + /* Must update the mgs logs */ + mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; + } + } +#endif + return 0; } @@ -1201,19 +1385,19 @@ int main(int argc, char *const argv[]) set_defaults(&mop); /* device is last arg */ - strcpy(mop.mo_device, argv[argc - 1]); + strscpy(mop.mo_device, argv[argc - 1], sizeof(mop.mo_device)); /* Are we using a loop device? */ ret = is_block(mop.mo_device); - if (ret < 0) + if (ret < 0) goto out; - if (ret == 0) + if (ret == 0) mop.mo_flags |= MO_IS_LOOP; #ifdef TUNEFS /* For tunefs, we must read in the old values before parsing any new ones. */ - + /* Check whether the disk has already been formatted by mkfs.lustre */ ret = is_lustre_target(&mop); if (ret == 0) { @@ -1234,16 +1418,16 @@ int main(int argc, char *const argv[]) if (strstr(mop.mo_ldd.ldd_params, PARAM_MGSNODE)) mop.mo_mgs_failnodes++; - if (verbose > 0) + if (verbose > 0) print_ldd("Read previous values", &(mop.mo_ldd)); #endif ret = parse_opts(argc, argv, &mop, &mountopts); - if (ret) + if (ret) goto out; ldd = &mop.mo_ldd; - + if (!(IS_MDT(ldd) || IS_OST(ldd) || IS_MGS(ldd))) { fatal(); fprintf(stderr, "must set target type: MDT,OST,MGS\n"); @@ -1269,30 +1453,34 @@ int main(int argc, char *const argv[]) if (!IS_MGS(ldd) && (mop.mo_mgs_failnodes == 0)) { fatal(); - if (IS_MDT(ldd)) + if (IS_MDT(ldd)) fprintf(stderr, "Must specify --mgs or --mgsnode=\n"); - else + else fprintf(stderr, "Must specify --mgsnode=\n"); ret = EINVAL; goto out; } - /* These are the permanent mount options (always included) */ + /* These are the permanent mount options (always included) */ switch (ldd->ldd_mount_type) { case LDD_MT_EXT3: - case LDD_MT_LDISKFS: { + case LDD_MT_LDISKFS: + case LDD_MT_LDISKFS2: { sprintf(always_mountopts, "errors=remount-ro"); if (IS_MDT(ldd) || IS_MGS(ldd)) - strcat(always_mountopts, - ",iopen_nopriv,user_xattr"); + strscat(always_mountopts, ",iopen_nopriv,user_xattr", + sizeof(always_mountopts)); if ((get_os_version() == 24) && IS_OST(ldd)) - strcat(always_mountopts, ",asyncdel"); + strscat(always_mountopts, ",asyncdel", + sizeof(always_mountopts)); /* NB: Files created while extents are enabled cannot be read - if mounted with a kernel that doesn't include the CFS + if mounted with a kernel that doesn't include the CFS patches! */ - if (IS_OST(ldd) && - ldd->ldd_mount_type == LDD_MT_LDISKFS) { - strcat(default_mountopts, ",extents,mballoc"); + if (IS_OST(ldd) && + (ldd->ldd_mount_type == LDD_MT_LDISKFS || + ldd->ldd_mount_type == LDD_MT_LDISKFS2)) { + strscat(default_mountopts, ",extents,mballoc", + sizeof(default_mountopts)); } break; } @@ -1310,20 +1498,20 @@ int main(int argc, char *const argv[]) ret = EINVAL; goto out; } - } + } if (mountopts) { /* If user specifies mount opts, don't use defaults, but always use always_mountopts */ - sprintf(ldd->ldd_mount_opts, "%s,%s", + sprintf(ldd->ldd_mount_opts, "%s,%s", always_mountopts, mountopts); } else { #ifdef TUNEFS - if (ldd->ldd_mount_opts[0] == 0) + if (ldd->ldd_mount_opts[0] == 0) /* use the defaults unless old opts exist */ #endif { - sprintf(ldd->ldd_mount_opts, "%s%s", + sprintf(ldd->ldd_mount_opts, "%s%s", always_mountopts, default_mountopts); } } @@ -1345,14 +1533,14 @@ int main(int argc, char *const argv[]) /* Create the loopback file */ if (mop.mo_flags & MO_IS_LOOP) { ret = access(mop.mo_device, F_OK); - if (ret) + if (ret) ret = errno; #ifndef TUNEFS /* mkfs.lustre */ /* Reformat the loopback file */ if (ret || (mop.mo_flags & MO_FORCEFORMAT)) ret = loop_format(&mop); #endif - if (ret == 0) + if (ret == 0) ret = loop_setup(&mop); if (ret) { fatal(); @@ -1368,7 +1556,7 @@ int main(int argc, char *const argv[]) ret = is_lustre_target(&mop); if (ret) { fatal(); - fprintf(stderr, "Device %s was previously formatted " + fprintf(stderr, "Device %s was previously formatted " "for lustre. Use --reformat to reformat it, " "or tunefs.lustre to modify.\n", mop.mo_device); @@ -1394,9 +1582,9 @@ int main(int argc, char *const argv[]) } out: - loop_cleanup(&mop); + loop_cleanup(&mop); - /* Fix any crazy return values from system() */ + /* Fix any crazy return values from system() */ if (ret && ((ret & 255) == 0)) return (1); if (ret)