*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2012, 2013, Intel Corporation.
+ * Copyright (c) 2012, 2016, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
-#include "mount_utils.h"
-#include <stdlib.h>
+#include <stdarg.h>
#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
#include <unistd.h>
#include <fcntl.h>
-#include <stdarg.h>
#include <mntent.h>
#include <glob.h>
#include <limits.h>
#include <ctype.h>
-#ifdef __linux__
-/* libcfs.h is not really needed here, but on SLES10/PPC, fs.h includes idr.h
- * which requires BITS_PER_LONG to be defined */
-#include <libcfs/libcfs.h>
#ifndef BLKGETSIZE64
#include <linux/fs.h> /* for BLKGETSIZE64 */
#endif
#include <linux/version.h>
-#endif
#include <lustre_disk.h>
#include <lustre_param.h>
#include <lnet/lnetctl.h>
#include <selinux/selinux.h>
#endif
+#include "mount_utils.h"
+
#define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb"
#define MAX_SECTORS_KB_PATH "queue/max_sectors_kb"
#define SCHEDULER_PATH "queue/scheduler"
#define DUMMY_FILE_NAME_LEN 25
#define EXT3_DIRENT_SIZE DUMMY_FILE_NAME_LEN
+static void append_unique(char *buf, char *prefix, char *key, char *val,
+ size_t maxbuflen);
+
/*
- * Concatenate context of the temporary mount point iff selinux is enabled
+ * Concatenate context of the temporary mount point if selinux is enabled
*/
#ifdef HAVE_SELINUX
static void append_context_for_mount(char *mntpt, struct mkfs_opts *mop)
}
if (fcontext != NULL) {
- strcat(mop->mo_ldd.ldd_mount_opts, ",context=");
- strcat(mop->mo_ldd.ldd_mount_opts, fcontext);
+ append_unique(mop->mo_ldd.ldd_mount_opts,
+ ",", "context", fcontext,
+ sizeof(mop->mo_ldd.ldd_mount_opts));
freecon(fcontext);
}
}
char enabled_features[4096] = "";
int ret = 1;
- snprintf(cmd, sizeof(cmd), "%s -R features %s 2>&1",
+ snprintf(cmd, sizeof(cmd), "%s -c -R features %s 2>&1",
DEBUGFS, devpath);
/* Using popen() instead of run_command() since debugfs does
dev = mop->mo_loopdev;
ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0,
- mop->mo_ldd.ldd_mount_opts);
+ (mop->mo_mountopts == NULL) ?
+ "errors=remount-ro" : mop->mo_mountopts);
if (ret) {
fprintf(stderr, "%s: Unable to mount %s: %s\n",
progname, dev, strerror(errno));
fclose(filep);
goto out_umnt;
}
+ fsync(filep->_fileno);
fclose(filep);
out_umnt:
return ret;
}
+int ldiskfs_erase_ldd(struct mkfs_opts *mop, char *param)
+{
+ return 0;
+}
+
+void ldiskfs_print_ldd_params(struct mkfs_opts *mop)
+{
+ printf("Parameters:%s\n", mop->mo_ldd.ldd_params);
+}
/* Display the need for the latest e2fsprogs to be installed. make_backfs
* indicates if the caller is make_lustre_backfs() or not. */
E2FSPROGS, feature);
#if !(HAVE_LDISKFSPROGS)
fprintf(stderr, "Please install the latest version of e2fsprogs from\n"
- "http://downloads.whamcloud.com/public/e2fsprogs/latest/\n"
+ "https://downloads.hpdd.intel.com/public/e2fsprogs/latest/\n"
"to enable this feature.\n");
#endif
if (make_backfs)
strscat(buf, key, maxbuflen);
if (val != NULL) {
- strscat(buf, "=", maxbuflen);
+ strscat(buf, "=\"", maxbuflen);
strscat(buf, val, maxbuflen);
+ strscat(buf, "\"", maxbuflen);
}
}
}
{
if (IS_OST(&mop->mo_ldd)) {
append_unique(anchor, user_spec ? "," : " -O ",
- "extents", NULL, sizeof(mop->mo_mkfsopts));
+ "extents", NULL, maxbuflen);
append_unique(anchor, ",", "uninit_bg", NULL, maxbuflen);
} else if (IS_MDT(&mop->mo_ldd)) {
append_unique(anchor, user_spec ? "," : " -O ",
append_unique(anchor, ",", "flex_bg", NULL, maxbuflen);
- if (IS_OST(&mop->mo_ldd)) {
+ if (IS_OST(&mop->mo_ldd) &&
+ strstr(mop->mo_mkfsopts, "-G") == NULL) {
snprintf(tmp_buf, sizeof(tmp_buf), " -G %u",
(1 << 20) / L_BLOCK_SIZE);
strscat(anchor, tmp_buf, maxbuflen);
*/
static char *moveopts_to_end(char *start)
{
+ size_t len;
char save[512];
char *end, *idx;
while (*end != ' ' && *end != '\0')
++end;
+ len = end - start;
+ if (len >= sizeof(save))
+ len = sizeof(save) - 1;
+
/* save options */
- strncpy(save, start, end - start);
- save[end - start] = '\0';
+ strncpy(save, start, len);
+ save[len] = '\0';
/* move remaining options up front */
if (*end)
}
if (mop->mo_device_kb != 0) {
- if (mop->mo_device_kb < 8096) {
+ if (mop->mo_device_kb < 32384) {
fprintf(stderr, "%s: size of filesystem must be larger "
- "than 8MB, but is set to %lldKB\n",
+ "than 32MB, but is set to %lldKB\n",
progname, (long long)mop->mo_device_kb);
return EINVAL;
}
}
}
- /* Inode size (for extended attributes). The LOV EA size is
- * 32 (EA hdr) + 32 (lov_mds_md) + stripes * 24 (lov_ost_data),
- * and we want some margin above that for ACLs, other EAs... */
+ /* Inode size includes:
+ * ldiskfs inode size: 156
+ * extended attributes size, including:
+ * ext4_xattr_header: 32
+ * LOV EA size: 32(lov_mds_md) +
+ * stripes * 24(lov_ost_data) +
+ * 16(xattr_entry) + 3(lov)
+ * LMA EA size: 24(lustre_mdt_attrs) +
+ * 16(xattr_entry) + 3(lma)
+ * link EA size: 24(link_ea_header) + 18(link_ea_entry) +
+ * (filename) + 16(xattr_entry) + 4(link)
+ * and some margin for 4-byte alignment, ACLs and other EAs.
+ *
+ * If we say the average filename length is about 32 bytes,
+ * the calculation looks like:
+ * 156 + 32 + (32+24*N+19) + (24+19) + (24+18+~32+20) + other <=
+ * 512*2^m, {m=0,1,2,3}
+ */
if (strstr(mop->mo_mkfsopts, "-I") == NULL) {
if (IS_MDT(&mop->mo_ldd)) {
- if (mop->mo_stripe_count > 72)
+ if (mop->mo_stripe_count > 69)
inode_size = 512; /* bz 7241 */
/* see also "-i" below for EA blocks */
- else if (mop->mo_stripe_count > 32)
+ else if (mop->mo_stripe_count > 26)
inode_size = 2048;
- else if (mop->mo_stripe_count > 10)
+ else if (mop->mo_stripe_count > 5)
inode_size = 1024;
else
inode_size = 512;
if (IS_MDT(&mop->mo_ldd)) {
bytes_per_inode = inode_size + 1536;
- if (mop->mo_stripe_count > 72) {
+ if (mop->mo_stripe_count > 69) {
int extra = mop->mo_stripe_count * 24;
extra = ((extra - 1) | 4095) + 1;
bytes_per_inode += extra;
* descriptor blocks, but leave one block for the superblock.
* Only useful for filesystems with < 2^32 blocks due to resize
* limitations. */
- if (IS_OST(&mop->mo_ldd) && mop->mo_device_kb > 100 * 1024 &&
+ if (strstr(mop->mo_mkfsopts, "meta_bg") == NULL &&
+ IS_OST(&mop->mo_ldd) && mop->mo_device_kb > 100 * 1024 &&
mop->mo_device_kb * 1024 / L_BLOCK_SIZE <= 0xffffffffULL) {
unsigned group_blocks = L_BLOCK_SIZE * 8;
unsigned desc_per_block = L_BLOCK_SIZE / 32;
vprint("formatting backing filesystem %s on %s\n",
MT_STR(&mop->mo_ldd), dev);
- vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname);
- vprint("\t4k blocks "LPU64"\n", block_count);
+ vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname);
+ vprint("\t4k blocks %ju\n", (uintmax_t)block_count);
vprint("\toptions %s\n", mop->mo_mkfsopts);
/* mkfs_cmd's trailing space is important! */
strscat(mkfs_cmd, " ", sizeof(mkfs_cmd));
strscat(mkfs_cmd, dev, sizeof(mkfs_cmd));
if (block_count != 0) {
- sprintf(buf, " "LPU64, block_count);
+ snprintf(buf, sizeof(buf), " %ju",
+ (uintmax_t)block_count);
strscat(mkfs_cmd, buf, sizeof(mkfs_cmd));
}
}
int ldiskfs_prepare_lustre(struct mkfs_opts *mop,
- char *default_mountopts, int default_len,
- char *always_mountopts, int always_len)
+ char *wanted_mountopts, size_t len)
{
struct lustre_disk_data *ldd = &mop->mo_ldd;
int ret;
mop->mo_flags |= MO_IS_LOOP;
}
- strscat(default_mountopts, ",errors=remount-ro", default_len);
if (IS_MDT(ldd) || IS_MGS(ldd))
- strscat(always_mountopts, ",user_xattr", always_len);
+ strscat(wanted_mountopts, ",user_xattr", len);
+
+ return 0;
+}
+
+int ldiskfs_fix_mountopts(struct mkfs_opts *mop, char *mountopts, size_t len)
+{
+ if (strstr(mountopts, "errors=") == NULL)
+ strscat(mountopts, ",errors=remount-ro", len);
return 0;
}
if (fd == NULL)
return errno;
- /* should not ignore fgets(3)'s return value */
- if (!fgets(buf, size, fd)) {
+ if (fgets(buf, size, fd) == NULL) {
fprintf(stderr, "reading from %s: %s", path, strerror(errno));
fclose(fd);
return 1;
}
fclose(fd);
+
+ /* strip trailing newline */
+ size = strlen(buf);
+ if (buf[size - 1] == '\n')
+ buf[size - 1] = '\0';
+
return 0;
}
static int write_file(const char *path, const char *buf)
{
- FILE *fd;
+ int fd, rc;
- fd = fopen(path, "w");
- if (fd == NULL)
+ fd = open(path, O_WRONLY);
+ if (fd < 0)
return errno;
- fputs(buf, fd);
- fclose(fd);
- return 0;
+ rc = write(fd, buf, strlen(buf));
+ close(fd);
+
+ return rc < 0 ? errno : 0;
}
static int set_blockdev_scheduler(const char *path, const char *scheduler)
{
- char buf[PATH_MAX], *c;
+ char buf[PATH_MAX], *s, *e, orig_sched[50];
int rc;
/* Before setting the scheduler, we need to check to see if it's
}
/* The expected format of buf: noop anticipatory deadline [cfq] */
- c = strchr(buf, '[');
+ s = strchr(buf, '[');
+ e = strchr(buf, ']');
- /* If c is NULL, the format is not what we expect. Play it safe
- * and error out. */
- if (c == NULL) {
+ /* If the format is not what we expect. Play it safe and error out. */
+ if (s == NULL || e == NULL) {
if (verbose)
fprintf(stderr, "%s: cannot parse scheduler "
"options for '%s'\n", progname, path);
return -EINVAL;
}
- if (strncmp(c+1, "noop", 4) == 0)
+ snprintf(orig_sched, e - s, "%s", s + 1);
+
+ if (strcmp(orig_sched, "noop") == 0 ||
+ strcmp(orig_sched, scheduler) == 0)
return 0;
rc = write_file(path, scheduler);
"'%s': %s\n", progname, path,
strerror(errno));
return rc;
+ } else {
+ fprintf(stderr, "%s: change scheduler of %s from %s to %s\n",
+ progname, path, orig_sched, scheduler);
}
return rc;
snprintf(buf, sizeof(buf), "%d",
mop->mo_md_stripe_cache_size);
rc = write_file(real_path, buf);
- if (rc && verbose)
+ if (rc != 0 && verbose)
fprintf(stderr, "warning: opening %s: %s\n",
real_path, strerror(errno));
}
return rc;
}
- snprintf(real_path, sizeof(real_path), "%s/%s", path,
- MAX_HW_SECTORS_KB_PATH);
- rc = read_file(real_path, buf, sizeof(buf));
- if (rc) {
- if (verbose)
- fprintf(stderr, "warning: opening %s: %s\n",
- real_path, strerror(errno));
- /* No MAX_HW_SECTORS_KB_PATH isn't necessary an
- * error for some device. */
- rc = 0;
+ if (mop->mo_max_sectors_kb >= 0) {
+ snprintf(buf, sizeof(buf), "%d", mop->mo_max_sectors_kb);
+ } else {
+ snprintf(real_path, sizeof(real_path), "%s/%s", path,
+ MAX_HW_SECTORS_KB_PATH);
+ rc = read_file(real_path, buf, sizeof(buf));
+ if (rc) {
+ if (verbose)
+ fprintf(stderr, "warning: opening %s: %s\n",
+ real_path, strerror(errno));
+ /* No MAX_HW_SECTORS_KB_PATH isn't necessary an
+ * error for some device. */
+ goto subdevs;
+ }
}
if (strlen(buf) - 1 > 0) {
+ char oldbuf[32] = "", *end = NULL;
+ unsigned long long oldval, newval;
+
snprintf(real_path, sizeof(real_path), "%s/%s", path,
MAX_SECTORS_KB_PATH);
+ rc = read_file(real_path, oldbuf, sizeof(oldbuf));
+ /* Only set new parameter if different from the old one. */
+ if (rc != 0 || strcmp(oldbuf, buf) == 0) {
+ /* No MAX_SECTORS_KB_PATH isn't necessary an
+ * error for some device. */
+ goto subdevs;
+ }
+
+ newval = strtoull(buf, &end, 0);
+ if (newval == 0 || newval == ULLONG_MAX || end == buf)
+ goto subdevs;
+
+ /* Don't increase IO request size limit past 16MB. It is about
+ * PTLRPC_MAX_BRW_SIZE, but that isn't in a public header.
+ * Note that even though the block layer allows larger values,
+ * setting max_sectors_kb = 32768 causes crashes (LU-6974). */
+ if (mop->mo_max_sectors_kb < 0 && newval > 16 * 1024) {
+ newval = 16 * 1024;
+ snprintf(buf, sizeof(buf), "%llu", newval);
+ }
+
+ oldval = strtoull(oldbuf, &end, 0);
+ /* Don't shrink the current limit. */
+ if (mop->mo_max_sectors_kb < 0 && oldval != ULLONG_MAX &&
+ newval <= oldval)
+ goto subdevs;
+
rc = write_file(real_path, buf);
- if (rc) {
+ if (rc != 0) {
if (verbose)
fprintf(stderr, "warning: writing to %s: %s\n",
real_path, strerror(errno));
/* No MAX_SECTORS_KB_PATH isn't necessary an
* error for some device. */
- rc = 0;
+ goto subdevs;
}
+ fprintf(stderr, "%s: increased %s from %s to %s\n",
+ progname, real_path, oldbuf, buf);
}
+subdevs:
/* Purposely ignore errors reported from set_blockdev_scheduler.
* The worst that will happen is a block device with an "incorrect"
* scheduler. */
return rc;
}
+int ldiskfs_rename_fsname(struct mkfs_opts *mop, const char *oldname)
+{
+ struct mount_opts opts;
+ struct lustre_disk_data *ldd = &mop->mo_ldd;
+ char mntpt[] = "/tmp/mntXXXXXX";
+ char *dev;
+ int ret;
+
+ /* Change the filesystem label. */
+ opts.mo_ldd = *ldd;
+ opts.mo_source = mop->mo_device;
+ ret = ldiskfs_label_lustre(&opts);
+ if (ret) {
+ if (errno != 0)
+ ret = errno;
+ fprintf(stderr, "Can't change filesystem label: %s\n",
+ strerror(ret));
+ return ret;
+ }
+
+ /* Mount this device temporarily in order to write these files */
+ if (mkdtemp(mntpt) == NULL) {
+ if (errno != 0)
+ ret = errno;
+ else
+ ret = EINVAL;
+ fprintf(stderr, "Can't create temp mount point %s: %s\n",
+ mntpt, strerror(ret));
+ return ret;
+ }
+
+#ifdef HAVE_SELINUX
+ /*
+ * Append file context to mount options if SE Linux is enabled
+ */
+ if (is_selinux_enabled() > 0)
+ append_context_for_mount(mntpt, mop);
+#endif
+
+ if (mop->mo_flags & MO_IS_LOOP)
+ dev = mop->mo_loopdev;
+ else
+ dev = mop->mo_device;
+ ret = mount(dev, mntpt, MT_STR(ldd), 0, ldd->ldd_mount_opts);
+ if (ret) {
+ if (errno != 0)
+ ret = errno;
+ fprintf(stderr, "Unable to mount %s: %s\n",
+ dev, strerror(ret));
+ if (ret == ENODEV)
+ fprintf(stderr, "Is the %s module available?\n",
+ MT_STR(ldd));
+ goto out_rmdir;
+ }
+
+ ret = lustre_rename_fsname(mop, mntpt, oldname);
+ umount(mntpt);
+
+out_rmdir:
+ rmdir(mntpt);
+ return ret;
+}
+
/* Enable quota accounting */
int ldiskfs_enable_quota(struct mkfs_opts *mop)
{