*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2012, 2014, Intel Corporation.
+ * Copyright (c) 2012, 2016, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
-#include "mount_utils.h"
-#include <stdlib.h>
+#include <stdarg.h>
#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
#include <unistd.h>
#include <fcntl.h>
-#include <stdarg.h>
#include <mntent.h>
#include <glob.h>
#ifndef BLKGETSIZE64
#include <linux/fs.h> /* for BLKGETSIZE64 */
#endif
+#include <linux/types.h>
#include <linux/version.h>
-#include <lustre_disk.h>
#include <lustre_param.h>
#include <lnet/lnetctl.h>
#include <lustre_ver.h>
#include <selinux/selinux.h>
#endif
+#include "mount_utils.h"
+
#define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb"
#define MAX_SECTORS_KB_PATH "queue/max_sectors_kb"
#define SCHEDULER_PATH "queue/scheduler"
#define DUMMY_FILE_NAME_LEN 25
#define EXT3_DIRENT_SIZE DUMMY_FILE_NAME_LEN
+static void append_unique(char *buf, char *prefix, char *key, char *val,
+ size_t maxbuflen);
+
/*
- * Concatenate context of the temporary mount point iff selinux is enabled
+ * Concatenate context of the temporary mount point if selinux is enabled
*/
#ifdef HAVE_SELINUX
static void append_context_for_mount(char *mntpt, struct mkfs_opts *mop)
}
if (fcontext != NULL) {
- strcat(mop->mo_ldd.ldd_mount_opts, ",context=");
- strcat(mop->mo_ldd.ldd_mount_opts, fcontext);
+ append_unique(mop->mo_ldd.ldd_mount_opts,
+ ",", "context", fcontext,
+ sizeof(mop->mo_ldd.ldd_mount_opts));
freecon(fcontext);
}
}
char enabled_features[4096] = "";
int ret = 1;
- snprintf(cmd, sizeof(cmd), "%s -R features %s 2>&1",
+ snprintf(cmd, sizeof(cmd), "%s -c -R features %s 2>&1",
DEBUGFS, devpath);
/* Using popen() instead of run_command() since debugfs does
dev = mop->mo_loopdev;
ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0,
- mop->mo_ldd.ldd_mount_opts);
+ (mop->mo_mountopts == NULL) ?
+ "errors=remount-ro" : mop->mo_mountopts);
if (ret) {
fprintf(stderr, "%s: Unable to mount %s: %s\n",
progname, dev, strerror(errno));
fclose(filep);
goto out_umnt;
}
+ fsync(filep->_fileno);
fclose(filep);
out_umnt:
return ret;
}
+int ldiskfs_erase_ldd(struct mkfs_opts *mop, char *param)
+{
+ return 0;
+}
+
+void ldiskfs_print_ldd_params(struct mkfs_opts *mop)
+{
+ printf("Parameters:%s\n", mop->mo_ldd.ldd_params);
+}
/* Display the need for the latest e2fsprogs to be installed. make_backfs
* indicates if the caller is make_lustre_backfs() or not. */
strscat(buf, key, maxbuflen);
if (val != NULL) {
- strscat(buf, "=", maxbuflen);
+ strscat(buf, "=\"", maxbuflen);
strscat(buf, val, maxbuflen);
+ strscat(buf, "\"", maxbuflen);
}
}
}
{
if (IS_OST(&mop->mo_ldd)) {
append_unique(anchor, user_spec ? "," : " -O ",
- "extents", NULL, sizeof(mop->mo_mkfsopts));
+ "extents", NULL, maxbuflen);
append_unique(anchor, ",", "uninit_bg", NULL, maxbuflen);
} else if (IS_MDT(&mop->mo_ldd)) {
append_unique(anchor, user_spec ? "," : " -O ",
}
if (mop->mo_device_kb != 0) {
- if (mop->mo_device_kb < 8096) {
+ if (mop->mo_device_kb < 32384) {
fprintf(stderr, "%s: size of filesystem must be larger "
- "than 8MB, but is set to %lldKB\n",
+ "than 32MB, but is set to %lldKB\n",
progname, (long long)mop->mo_device_kb);
return EINVAL;
}
}
}
- /* Inode size includes:
+ /*
+ * The inode size is constituted by following elements
+ * (assuming all files are in composite layout and has
+ * 3 components):
+ *
* ldiskfs inode size: 156
* extended attributes size, including:
* ext4_xattr_header: 32
- * LOV EA size: 32(lov_mds_md) +
+ * LOV EA size: 32(lov_comp_md_v1) +
+ * 3 * 40(lov_comp_md_entry_v1) +
+ * 3 * 32(lov_mds_md) +
* stripes * 24(lov_ost_data) +
* 16(xattr_entry) + 3(lov)
* LMA EA size: 24(lustre_mdt_attrs) +
*
* If we say the average filename length is about 32 bytes,
* the calculation looks like:
- * 156 + 32 + (32+24*N+19) + (24+19) + (24+18+~32+20) + other <=
- * 512*2^m, {m=0,1,2,3}
+ * 156 + 32 + (32+3*(40 + 32)+24*N+19) + (24+19) +
+ * (24+18+~32+20) + other <= 512*2^m, {m=0,1,2,3}
*/
if (strstr(mop->mo_mkfsopts, "-I") == NULL) {
if (IS_MDT(&mop->mo_ldd)) {
- if (mop->mo_stripe_count > 69)
+ if (mop->mo_stripe_count > 59)
inode_size = 512; /* bz 7241 */
/* see also "-i" below for EA blocks */
- else if (mop->mo_stripe_count > 26)
+ else if (mop->mo_stripe_count > 16)
inode_size = 2048;
- else if (mop->mo_stripe_count > 5)
- inode_size = 1024;
else
- inode_size = 512;
+ inode_size = 1024;
} else if (IS_OST(&mop->mo_ldd)) {
- /* We store MDS FID and OST objid in EA on OST
- * we need to make inode bigger as well. */
- inode_size = 256;
+ /* We store MDS FID and necessary composite
+ * layout information in the OST object EA. */
+ inode_size = 512;
}
if (inode_size > 0) {
if (IS_MDT(&mop->mo_ldd)) {
bytes_per_inode = inode_size + 1536;
- if (mop->mo_stripe_count > 69) {
+ if (mop->mo_stripe_count > 59) {
int extra = mop->mo_stripe_count * 24;
extra = ((extra - 1) | 4095) + 1;
bytes_per_inode += extra;
* descriptor blocks, but leave one block for the superblock.
* Only useful for filesystems with < 2^32 blocks due to resize
* limitations. */
- if (IS_OST(&mop->mo_ldd) && mop->mo_device_kb > 100 * 1024 &&
+ if (strstr(mop->mo_mkfsopts, "meta_bg") == NULL &&
+ IS_OST(&mop->mo_ldd) && mop->mo_device_kb > 100 * 1024 &&
mop->mo_device_kb * 1024 / L_BLOCK_SIZE <= 0xffffffffULL) {
unsigned group_blocks = L_BLOCK_SIZE * 8;
unsigned desc_per_block = L_BLOCK_SIZE / 32;
vprint("formatting backing filesystem %s on %s\n",
MT_STR(&mop->mo_ldd), dev);
- vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname);
- vprint("\t4k blocks "LPU64"\n", block_count);
+ vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname);
+ vprint("\t4k blocks %ju\n", (uintmax_t)block_count);
vprint("\toptions %s\n", mop->mo_mkfsopts);
/* mkfs_cmd's trailing space is important! */
strscat(mkfs_cmd, " ", sizeof(mkfs_cmd));
strscat(mkfs_cmd, dev, sizeof(mkfs_cmd));
if (block_count != 0) {
- sprintf(buf, " "LPU64, block_count);
+ snprintf(buf, sizeof(buf), " %ju",
+ (uintmax_t)block_count);
strscat(mkfs_cmd, buf, sizeof(mkfs_cmd));
}
}
int ldiskfs_prepare_lustre(struct mkfs_opts *mop,
- char *default_mountopts, int default_len,
- char *always_mountopts, int always_len)
+ char *wanted_mountopts, size_t len)
{
struct lustre_disk_data *ldd = &mop->mo_ldd;
int ret;
mop->mo_flags |= MO_IS_LOOP;
}
- strscat(default_mountopts, ",errors=remount-ro", default_len);
if (IS_MDT(ldd) || IS_MGS(ldd))
- strscat(always_mountopts, ",user_xattr", always_len);
+ strscat(wanted_mountopts, ",user_xattr", len);
+
+ return 0;
+}
+
+int ldiskfs_fix_mountopts(struct mkfs_opts *mop, char *mountopts, size_t len)
+{
+ if (strstr(mountopts, "errors=") == NULL)
+ strscat(mountopts, ",errors=remount-ro", len);
return 0;
}
return rc;
}
- snprintf(real_path, sizeof(real_path), "%s/%s", path,
- MAX_HW_SECTORS_KB_PATH);
- rc = read_file(real_path, buf, sizeof(buf));
- if (rc) {
- if (verbose)
- fprintf(stderr, "warning: opening %s: %s\n",
- real_path, strerror(errno));
- /* No MAX_HW_SECTORS_KB_PATH isn't necessary an
- * error for some device. */
- goto subdevs;
+ if (mop->mo_max_sectors_kb >= 0) {
+ snprintf(buf, sizeof(buf), "%d", mop->mo_max_sectors_kb);
+ } else {
+ snprintf(real_path, sizeof(real_path), "%s/%s", path,
+ MAX_HW_SECTORS_KB_PATH);
+ rc = read_file(real_path, buf, sizeof(buf));
+ if (rc) {
+ if (verbose)
+ fprintf(stderr, "warning: opening %s: %s\n",
+ real_path, strerror(errno));
+ /* No MAX_HW_SECTORS_KB_PATH isn't necessary an
+ * error for some device. */
+ goto subdevs;
+ }
}
if (strlen(buf) - 1 > 0) {
if (newval == 0 || newval == ULLONG_MAX || end == buf)
goto subdevs;
- /* Don't increase IO request size limit past 32MB. It is about
- * 2x PTLRPC_MAX_BRW_SIZE, but that isn't in a public header. */
- if (newval > 32 * 1024) {
- newval = 32 * 1024;
+ /* Don't increase IO request size limit past 16MB. It is about
+ * PTLRPC_MAX_BRW_SIZE, but that isn't in a public header.
+ * Note that even though the block layer allows larger values,
+ * setting max_sectors_kb = 32768 causes crashes (LU-6974). */
+ if (mop->mo_max_sectors_kb < 0 && newval > 16 * 1024) {
+ newval = 16 * 1024;
snprintf(buf, sizeof(buf), "%llu", newval);
}
oldval = strtoull(oldbuf, &end, 0);
/* Don't shrink the current limit. */
- if (oldval != ULLONG_MAX && newval <= oldval)
+ if (mop->mo_max_sectors_kb < 0 && oldval != ULLONG_MAX &&
+ newval <= oldval)
goto subdevs;
rc = write_file(real_path, buf);
return rc;
}
+int ldiskfs_rename_fsname(struct mkfs_opts *mop, const char *oldname)
+{
+ struct mount_opts opts;
+ struct lustre_disk_data *ldd = &mop->mo_ldd;
+ char mntpt[] = "/tmp/mntXXXXXX";
+ char *dev;
+ int ret;
+
+ /* Change the filesystem label. */
+ opts.mo_ldd = *ldd;
+ opts.mo_source = mop->mo_device;
+ ret = ldiskfs_label_lustre(&opts);
+ if (ret) {
+ if (errno != 0)
+ ret = errno;
+ fprintf(stderr, "Can't change filesystem label: %s\n",
+ strerror(ret));
+ return ret;
+ }
+
+ /* Mount this device temporarily in order to write these files */
+ if (mkdtemp(mntpt) == NULL) {
+ if (errno != 0)
+ ret = errno;
+ else
+ ret = EINVAL;
+ fprintf(stderr, "Can't create temp mount point %s: %s\n",
+ mntpt, strerror(ret));
+ return ret;
+ }
+
+#ifdef HAVE_SELINUX
+ /*
+ * Append file context to mount options if SE Linux is enabled
+ */
+ if (is_selinux_enabled() > 0)
+ append_context_for_mount(mntpt, mop);
+#endif
+
+ if (mop->mo_flags & MO_IS_LOOP)
+ dev = mop->mo_loopdev;
+ else
+ dev = mop->mo_device;
+ ret = mount(dev, mntpt, MT_STR(ldd), 0, ldd->ldd_mount_opts);
+ if (ret) {
+ if (errno != 0)
+ ret = errno;
+ fprintf(stderr, "Unable to mount %s: %s\n",
+ dev, strerror(ret));
+ if (ret == ENODEV)
+ fprintf(stderr, "Is the %s module available?\n",
+ MT_STR(ldd));
+ goto out_rmdir;
+ }
+
+ ret = lustre_rename_fsname(mop, mntpt, oldname);
+ umount(mntpt);
+
+out_rmdir:
+ rmdir(mntpt);
+ return ret;
+}
+
/* Enable quota accounting */
int ldiskfs_enable_quota(struct mkfs_opts *mop)
{