* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2012, 2013, Intel Corporation.
+ * Copyright (c) 2012, 2015, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include <limits.h>
#include <ctype.h>
-/* libcfs.h is not really needed here, but on SLES10/PPC, fs.h includes idr.h
- * which requires BITS_PER_LONG to be defined */
-#include <libcfs/libcfs.h>
#ifndef BLKGETSIZE64
#include <linux/fs.h> /* for BLKGETSIZE64 */
#endif
#define DUMMY_FILE_NAME_LEN 25
#define EXT3_DIRENT_SIZE DUMMY_FILE_NAME_LEN
+static void append_unique(char *buf, char *prefix, char *key, char *val,
+ size_t maxbuflen);
+
/*
- * Concatenate context of the temporary mount point iff selinux is enabled
+ * Concatenate context of the temporary mount point if selinux is enabled
*/
#ifdef HAVE_SELINUX
static void append_context_for_mount(char *mntpt, struct mkfs_opts *mop)
}
if (fcontext != NULL) {
- strcat(mop->mo_ldd.ldd_mount_opts, ",context=");
- strcat(mop->mo_ldd.ldd_mount_opts, fcontext);
+ append_unique(mop->mo_ldd.ldd_mount_opts,
+ ",", "context", fcontext,
+ sizeof(mop->mo_ldd.ldd_mount_opts));
freecon(fcontext);
}
}
dev = mop->mo_loopdev;
ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0,
- mop->mo_ldd.ldd_mount_opts);
+ (mop->mo_mountopts == NULL) ?
+ "errors=remount-ro" : mop->mo_mountopts);
if (ret) {
fprintf(stderr, "%s: Unable to mount %s: %s\n",
progname, dev, strerror(errno));
strscat(buf, key, maxbuflen);
if (val != NULL) {
- strscat(buf, "=", maxbuflen);
+ strscat(buf, "=\"", maxbuflen);
strscat(buf, val, maxbuflen);
+ strscat(buf, "\"", maxbuflen);
}
}
}
append_unique(anchor, ",", "flex_bg", NULL, maxbuflen);
- if (IS_OST(&mop->mo_ldd)) {
+ if (IS_OST(&mop->mo_ldd) &&
+ strstr(mop->mo_mkfsopts, "-G") == NULL) {
snprintf(tmp_buf, sizeof(tmp_buf), " -G %u",
(1 << 20) / L_BLOCK_SIZE);
strscat(anchor, tmp_buf, maxbuflen);
}
if (mop->mo_device_kb != 0) {
- if (mop->mo_device_kb < 8096) {
+ if (mop->mo_device_kb < 32384) {
fprintf(stderr, "%s: size of filesystem must be larger "
- "than 8MB, but is set to %lldKB\n",
+ "than 32MB, but is set to %lldKB\n",
progname, (long long)mop->mo_device_kb);
return EINVAL;
}
}
}
- /* Inode size (for extended attributes). The LOV EA size is
- * 32 (EA hdr) + 32 (lov_mds_md) + stripes * 24 (lov_ost_data),
- * and we want some margin above that for ACLs, other EAs... */
+ /* Inode size includes:
+ * ldiskfs inode size: 156
+ * extended attributes size, including:
+ * ext4_xattr_header: 32
+ * LOV EA size: 32(lov_mds_md) +
+ * stripes * 24(lov_ost_data) +
+ * 16(xattr_entry) + 3(lov)
+ * LMA EA size: 24(lustre_mdt_attrs) +
+ * 16(xattr_entry) + 3(lma)
+ * link EA size: 24(link_ea_header) + 18(link_ea_entry) +
+ * (filename) + 16(xattr_entry) + 4(link)
+ * and some margin for 4-byte alignment, ACLs and other EAs.
+ *
+ * If we say the average filename length is about 32 bytes,
+ * the calculation looks like:
+ * 156 + 32 + (32+24*N+19) + (24+19) + (24+18+~32+20) + other <=
+ * 512*2^m, {m=0,1,2,3}
+ */
if (strstr(mop->mo_mkfsopts, "-I") == NULL) {
if (IS_MDT(&mop->mo_ldd)) {
- if (mop->mo_stripe_count > 72)
+ if (mop->mo_stripe_count > 69)
inode_size = 512; /* bz 7241 */
/* see also "-i" below for EA blocks */
- else if (mop->mo_stripe_count > 32)
+ else if (mop->mo_stripe_count > 26)
inode_size = 2048;
- else if (mop->mo_stripe_count > 10)
+ else if (mop->mo_stripe_count > 5)
inode_size = 1024;
else
inode_size = 512;
if (IS_MDT(&mop->mo_ldd)) {
bytes_per_inode = inode_size + 1536;
- if (mop->mo_stripe_count > 72) {
+ if (mop->mo_stripe_count > 69) {
int extra = mop->mo_stripe_count * 24;
extra = ((extra - 1) | 4095) + 1;
bytes_per_inode += extra;
vprint("formatting backing filesystem %s on %s\n",
MT_STR(&mop->mo_ldd), dev);
- vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname);
- vprint("\t4k blocks "LPU64"\n", block_count);
+ vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname);
+ vprint("\t4k blocks %ju\n", (uintmax_t)block_count);
vprint("\toptions %s\n", mop->mo_mkfsopts);
/* mkfs_cmd's trailing space is important! */
strscat(mkfs_cmd, " ", sizeof(mkfs_cmd));
strscat(mkfs_cmd, dev, sizeof(mkfs_cmd));
if (block_count != 0) {
- sprintf(buf, " "LPU64, block_count);
+ snprintf(buf, sizeof(buf), " %ju",
+ (uintmax_t)block_count);
strscat(mkfs_cmd, buf, sizeof(mkfs_cmd));
}
}
int ldiskfs_prepare_lustre(struct mkfs_opts *mop,
- char *default_mountopts, int default_len,
- char *always_mountopts, int always_len)
+ char *wanted_mountopts, size_t len)
{
struct lustre_disk_data *ldd = &mop->mo_ldd;
int ret;
mop->mo_flags |= MO_IS_LOOP;
}
- strscat(default_mountopts, ",errors=remount-ro", default_len);
if (IS_MDT(ldd) || IS_MGS(ldd))
- strscat(always_mountopts, ",user_xattr", always_len);
+ strscat(wanted_mountopts, ",user_xattr", len);
+
+ return 0;
+}
+
+int ldiskfs_fix_mountopts(struct mkfs_opts *mop, char *mountopts, size_t len)
+{
+ if (strstr(mountopts, "errors=") == NULL)
+ strscat(mountopts, ",errors=remount-ro", len);
return 0;
}
if (fd == NULL)
return errno;
- /* should not ignore fgets(3)'s return value */
- if (!fgets(buf, size, fd)) {
+ if (fgets(buf, size, fd) == NULL) {
fprintf(stderr, "reading from %s: %s", path, strerror(errno));
fclose(fd);
return 1;
}
fclose(fd);
+
+ /* strip trailing newline */
+ size = strlen(buf);
+ if (buf[size - 1] == '\n')
+ buf[size - 1] = '\0';
+
return 0;
}
static int write_file(const char *path, const char *buf)
{
- FILE *fd;
+ int fd, rc;
- fd = fopen(path, "w");
- if (fd == NULL)
+ fd = open(path, O_WRONLY);
+ if (fd < 0)
return errno;
- fputs(buf, fd);
- fclose(fd);
- return 0;
+ rc = write(fd, buf, strlen(buf));
+ close(fd);
+
+ return rc < 0 ? errno : 0;
}
static int set_blockdev_scheduler(const char *path, const char *scheduler)
{
- char buf[PATH_MAX], *c;
+ char buf[PATH_MAX], *s, *e, orig_sched[50];
int rc;
/* Before setting the scheduler, we need to check to see if it's
}
/* The expected format of buf: noop anticipatory deadline [cfq] */
- c = strchr(buf, '[');
+ s = strchr(buf, '[');
+ e = strchr(buf, ']');
- /* If c is NULL, the format is not what we expect. Play it safe
- * and error out. */
- if (c == NULL) {
+ /* If the format is not what we expect. Play it safe and error out. */
+ if (s == NULL || e == NULL) {
if (verbose)
fprintf(stderr, "%s: cannot parse scheduler "
"options for '%s'\n", progname, path);
return -EINVAL;
}
- if (strncmp(c+1, "noop", 4) == 0)
+ snprintf(orig_sched, e - s, "%s", s + 1);
+
+ if (strcmp(orig_sched, "noop") == 0 ||
+ strcmp(orig_sched, scheduler) == 0)
return 0;
rc = write_file(path, scheduler);
"'%s': %s\n", progname, path,
strerror(errno));
return rc;
+ } else {
+ fprintf(stderr, "%s: change scheduler of %s from %s to %s\n",
+ progname, path, orig_sched, scheduler);
}
return rc;
snprintf(buf, sizeof(buf), "%d",
mop->mo_md_stripe_cache_size);
rc = write_file(real_path, buf);
- if (rc && verbose)
+ if (rc != 0 && verbose)
fprintf(stderr, "warning: opening %s: %s\n",
real_path, strerror(errno));
}
real_path, strerror(errno));
/* No MAX_HW_SECTORS_KB_PATH isn't necessary an
* error for some device. */
- rc = 0;
+ goto subdevs;
}
if (strlen(buf) - 1 > 0) {
+ char oldbuf[32] = "", *end = NULL;
+ unsigned long long oldval, newval;
+
snprintf(real_path, sizeof(real_path), "%s/%s", path,
MAX_SECTORS_KB_PATH);
+ rc = read_file(real_path, oldbuf, sizeof(oldbuf));
+ /* Only set new parameter if different from the old one. */
+ if (rc != 0 || strcmp(oldbuf, buf) == 0) {
+ /* No MAX_SECTORS_KB_PATH isn't necessary an
+ * error for some device. */
+ goto subdevs;
+ }
+
+ newval = strtoull(buf, &end, 0);
+ if (newval == 0 || newval == ULLONG_MAX || end == buf)
+ goto subdevs;
+
+ /* Don't increase IO request size limit past 16MB. It is about
+ * PTLRPC_MAX_BRW_SIZE, but that isn't in a public header.
+ * Note that even though the block layer allows larger values,
+ * setting max_sectors_kb = 32768 causes crashes (LU-6974). */
+ if (newval > 16 * 1024) {
+ newval = 16 * 1024;
+ snprintf(buf, sizeof(buf), "%llu", newval);
+ }
+
+ oldval = strtoull(oldbuf, &end, 0);
+ /* Don't shrink the current limit. */
+ if (oldval != ULLONG_MAX && newval <= oldval)
+ goto subdevs;
+
rc = write_file(real_path, buf);
- if (rc) {
+ if (rc != 0) {
if (verbose)
fprintf(stderr, "warning: writing to %s: %s\n",
real_path, strerror(errno));
/* No MAX_SECTORS_KB_PATH isn't necessary an
* error for some device. */
- rc = 0;
+ goto subdevs;
}
+ fprintf(stderr, "%s: increased %s from %s to %s\n",
+ progname, real_path, oldbuf, buf);
}
+subdevs:
/* Purposely ignore errors reported from set_blockdev_scheduler.
* The worst that will happen is a block device with an "incorrect"
* scheduler. */