X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Futils%2Flibmount_utils_ldiskfs.c;h=5907213061e83915c9cd144ced5c5622dbfdd553;hb=14b8276e06d6f4e3bfe785df1165458555e406f3;hp=c1f79ae2df1fdbeb6a207007e05419ec6caeca33;hpb=2f8d7b4679de3fa467040aa61733f262714e39c9;p=fs%2Flustre-release.git

diff --git a/lustre/utils/libmount_utils_ldiskfs.c b/lustre/utils/libmount_utils_ldiskfs.c
index c1f79ae..5907213 100644
--- a/lustre/utils/libmount_utils_ldiskfs.c
+++ b/lustre/utils/libmount_utils_ldiskfs.c
@@ -27,7 +27,6 @@
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * lustre/utils/mount_utils_ldiskfs.c
  *
@@ -64,6 +63,8 @@
 #include <limits.h>
 #include <ctype.h>
 
+#include <ext2fs/ext2fs.h>
+
 #ifndef BLKGETSIZE64
 #include <linux/fs.h> /* for BLKGETSIZE64 */
 #endif
@@ -74,10 +75,6 @@
 #include <linux/lustre/lustre_ver.h>
 #include <libcfs/util/string.h>
 
-#ifdef HAVE_SELINUX
-#include <selinux/selinux.h>
-#endif
-
 #include "mount_utils.h"
 
 #define MAX_HW_SECTORS_KB_PATH	"queue/max_hw_sectors_kb"
@@ -89,40 +86,19 @@
 
 extern char *progname;
 
-#define L_BLOCK_SIZE 4096
+static ext2_filsys backfs;
+static int open_flags = EXT2_FLAG_64BITS | EXT2_FLAG_SKIP_MMP |
+			EXT2_FLAG_IGNORE_SB_ERRORS | EXT2_FLAG_SUPER_ONLY;
+
 /* keep it less than LL_FID_NAMELEN */
 #define DUMMY_FILE_NAME_LEN             25
 #define EXT3_DIRENT_SIZE                DUMMY_FILE_NAME_LEN
 
 static void append_unique(char *buf, char *prefix, char *key, char *val,
 			  size_t maxbuflen);
-static int is_e2fsprogs_feature_supp(const char *feature);
+static bool is_e2fsprogs_feature_supp(const char *feature);
 static void disp_old_e2fsprogs_msg(const char *feature, int make_backfs);
 
-/*
- * Concatenate context of the temporary mount point if selinux is enabled
- */
-#ifdef HAVE_SELINUX
-static void append_context_for_mount(char *mntpt, struct mkfs_opts *mop)
-{
-	security_context_t fcontext;
-
-	if (getfilecon(mntpt, &fcontext) < 0) {
-		/* Continuing with default behaviour */
-		fprintf(stderr, "%s: Get file context failed : %s\n",
-			progname, strerror(errno));
-		return;
-	}
-
-	if (fcontext != NULL) {
-		append_unique(mop->mo_ldd.ldd_mount_opts,
-			      ",", "context", fcontext,
-			      sizeof(mop->mo_ldd.ldd_mount_opts));
-		freecon(fcontext);
-	}
-}
-#endif
-
 /* Determine if a device is a block device (as opposed to a file) */
 static int is_block(char *devname)
 {
@@ -161,33 +137,6 @@ out:
 	return ret;
 }
 
-static int is_feature_enabled(const char *feature, const char *devpath)
-{
-	char cmd[PATH_MAX];
-	FILE *fp;
-	char enabled_features[4096] = "";
-	int ret = 1;
-
-	snprintf(cmd, sizeof(cmd), "%s -c -R features %s 2>&1",
-		 DEBUGFS, devpath);
-
-	/* Using popen() instead of run_command() since debugfs does
-	 * not return proper error code if command is not supported */
-	fp = popen(cmd, "r");
-	if (!fp) {
-		fprintf(stderr, "%s: %s\n", progname, strerror(errno));
-		return 0;
-	}
-
-	ret = fread(enabled_features, 1, sizeof(enabled_features) - 1, fp);
-	enabled_features[ret] = '\0';
-	pclose(fp);
-
-	if (strstr(enabled_features, feature))
-		return 1;
-	return 0;
-}
-
 /* Write the server config files */
 int ldiskfs_write_ldd(struct mkfs_opts *mop)
 {
@@ -205,33 +154,37 @@ int ldiskfs_write_ldd(struct mkfs_opts *mop)
 		return errno;
 	}
 
-	/*
-	 * Append file context to mount options if SE Linux is enabled
-	 */
-	#ifdef HAVE_SELINUX
-	if (is_selinux_enabled() > 0)
-		append_context_for_mount(mntpt, mop);
-	#endif
-
 	dev = mop->mo_device;
 	if (mop->mo_flags & MO_IS_LOOP)
 		dev = mop->mo_loopdev;
 
 	/* Multiple mount protection enabled if failover node specified */
-	if (mop->mo_flags & MO_FAILOVER &&
-	    !is_feature_enabled("mmp", dev)) {
-		if (is_e2fsprogs_feature_supp("-O mmp") == 0) {
-			char *command = filepnm;
-
-			snprintf(command, sizeof(filepnm),
-				 TUNE2FS" -O mmp '%s' >/dev/null 2>&1", dev);
-			ret = run_command(command, sizeof(filepnm));
-			if (ret)
-				fprintf(stderr,
-					"%s: Unable to set 'mmp' on %s: %d\n",
-					progname, dev, ret);
-		} else
-			disp_old_e2fsprogs_msg("mmp", 1);
+	if (mop->mo_flags & MO_FAILOVER) {
+		if (!backfs)
+			ext2fs_open(dev, open_flags, 0, 0,
+				    unix_io_manager, &backfs);
+		if (!backfs || !ext2fs_has_feature_mmp(backfs->super)) {
+			if (is_e2fsprogs_feature_supp("-O mmp")) {
+				char *command = filepnm;
+
+				snprintf(command, sizeof(filepnm),
+					 TUNE2FS" -O mmp '%s' >/dev/null 2>&1",
+					 dev);
+				ret = run_command(command, sizeof(filepnm));
+				if (ret)
+					fprintf(stderr,
+						"%s: Unable to set 'mmp' "
+						"on %s: %d\n",
+						progname, dev, ret);
+			} else {
+				disp_old_e2fsprogs_msg("mmp", 1);
+			}
+			/* avoid stale cache after following operations */
+			if (backfs) {
+				ext2fs_close(backfs);
+				backfs = NULL;
+			}
+		}
 	}
 
 	ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0,
@@ -307,51 +260,37 @@ static int readcmd(char *cmd, char *buf, int len)
 
 int ldiskfs_read_ldd(char *dev, struct lustre_disk_data *mo_ldd)
 {
-	char tmpdir[] = "/tmp/dirXXXXXX";
+	errcode_t retval;
+	ext2_ino_t ino;
+	ext2_file_t file;
+	unsigned int got;
 	char cmd[PATH_MAX];
-	char filepnm[128];
-	FILE *filep;
 	int ret = 0;
-	int cmdsz = sizeof(cmd);
-
-	/* Make a temporary directory to hold Lustre data files. */
-	if (!mkdtemp(tmpdir)) {
-		fprintf(stderr, "%s: Can't create temporary directory %s: %s\n",
-			progname, tmpdir, strerror(errno));
-		return errno;
-	}
-
-	/* TODO: it's worth observing the get_mountdata() function that is
-	   in mount_utils.c for getting the mountdata out of the
-	   filesystem */
 
-	/* Construct debugfs command line. */
-	snprintf(cmd, cmdsz, "%s -c -R 'dump /%s %s/mountdata' '%s'",
-		 DEBUGFS, MOUNT_DATA_FILE, tmpdir, dev);
-
-	ret = run_command(cmd, cmdsz);
-	if (ret)
-		verrprint("%s: Unable to dump %s dir (%d)\n",
-			  progname, MOUNT_CONFIGS_DIR, ret);
-
-	sprintf(filepnm, "%s/mountdata", tmpdir);
-	filep = fopen(filepnm, "r");
-	if (filep) {
-		size_t num_read;
-		vprint("Reading %s\n", MOUNT_DATA_FILE);
-		num_read = fread(mo_ldd, sizeof(*mo_ldd), 1, filep);
-		if (num_read < 1 && ferror(filep)) {
-			fprintf(stderr, "%s: Unable to read from file %s: %s\n",
-				progname, filepnm, strerror(errno));
+	if (!backfs) {
+		retval = ext2fs_open(dev, open_flags, 0, 0,
+				     unix_io_manager, &backfs);
+		if (retval) {
+			fprintf(stderr, "Unable to open fs on %s\n", dev);
+			goto read_label;
 		}
-		fclose(filep);
 	}
-
-	snprintf(cmd, cmdsz, "rm -rf %s", tmpdir);
-	run_command(cmd, cmdsz);
-	if (ret)
-		verrprint("Failed to read old data (%d)\n", ret);
-
+	retval = ext2fs_namei(backfs, EXT2_ROOT_INO, EXT2_ROOT_INO,
+			      MOUNT_DATA_FILE, &ino);
+	if (retval) {
+		fprintf(stderr, "Error while looking up %s\n", MOUNT_DATA_FILE);
+		goto read_label;
+	}
+	retval = ext2fs_file_open(backfs, ino, 0, &file);
+	if (retval) {
+		fprintf(stderr, "Error while opening file %s\n",
+			MOUNT_DATA_FILE);
+		goto read_label;
+	}
+	retval = ext2fs_file_read(file, mo_ldd, sizeof(*mo_ldd), &got);
+	if (retval || got == 0)
+		fprintf(stderr, "Failed to read file %s\n", MOUNT_DATA_FILE);
+read_label:
 	/* As long as we at least have the label, we're good to go */
 	snprintf(cmd, sizeof(cmd), E2LABEL" %s", dev);
 	ret = readcmd(cmd, mo_ldd->ldd_svname, sizeof(mo_ldd->ldd_svname) - 1);
@@ -392,45 +331,28 @@ static void disp_old_e2fsprogs_msg(const char *feature, int make_backfs)
 		"to enable this feature.\n");
 #endif
 	if (make_backfs)
-		fprintf(stderr, "Feature will not be enabled until %s"
-			"is updated and '%s -O %s %%{device}' "
-			"is run.\n\n", E2FSPROGS, TUNE2FS, feature);
+		fprintf(stderr,
+			"Feature will not be enabled until %s is updated and '%s -O %s %%{device}' is run.\n\n",
+			E2FSPROGS, TUNE2FS, feature);
 }
 
 /* Check whether the file exists in the device */
 static int file_in_dev(char *file_name, char *dev_name)
 {
-	FILE *fp;
-	char debugfs_cmd[256];
-	unsigned int inode_num;
-	int i;
-
-	/* Construct debugfs command line. */
-	snprintf(debugfs_cmd, sizeof(debugfs_cmd),
-		 "%s -c -R 'stat %s' '%s' 2>&1 | egrep '(Inode|unsupported)'",
-		 DEBUGFS, file_name, dev_name);
-
-	fp = popen(debugfs_cmd, "r");
-	if (!fp) {
-		fprintf(stderr, "%s: %s\n", progname, strerror(errno));
-		return 0;
-	}
+	ext2_ino_t ino;
+	errcode_t retval;
 
-	if (fscanf(fp, "Inode: %u", &inode_num) == 1) { /* exist */
-		pclose(fp);
-		return 1;
-	}
-	i = fread(debugfs_cmd, 1, sizeof(debugfs_cmd) - 1, fp);
-	if (i) {
-		debugfs_cmd[i] = 0;
-		fprintf(stderr, "%s", debugfs_cmd);
-		if (strstr(debugfs_cmd, "unsupported feature")) {
-			disp_old_e2fsprogs_msg("an unknown", 0);
-		}
-		pclose(fp);
-		return -1;
+	if (!backfs) {
+		retval = ext2fs_open(dev_name, open_flags, 0, 0,
+				     unix_io_manager, &backfs);
+		if (retval)
+			return 0;
 	}
-	pclose(fp);
+	retval = ext2fs_namei(backfs, EXT2_ROOT_INO, EXT2_ROOT_INO,
+			      file_name, &ino);
+	if (!retval)
+		return 1;
+
 	return 0;
 }
 
@@ -459,14 +381,14 @@ int ldiskfs_is_lustre(char *dev, unsigned *mount_type)
  * Firstly we try to use "debugfs supported_features" command to check if
  * the feature is supported. If this fails we try to set this feature with
  * mke2fs to check for its support. */
-static int is_e2fsprogs_feature_supp(const char *feature)
+static bool is_e2fsprogs_feature_supp(const char *feature)
 {
 	static char supp_features[4096] = "";
 	FILE *fp;
 	char cmd[PATH_MAX];
 	char imgname[] = "/tmp/test-img-XXXXXX";
-	int fd = -1;
-	int ret = 1;
+	int fd;
+	int ret;
 
 	if (supp_features[0] == '\0') {
 		snprintf(cmd, sizeof(cmd), "%s -c -R supported_features 2>&1",
@@ -477,20 +399,22 @@ static int is_e2fsprogs_feature_supp(const char *feature)
 		fp = popen(cmd, "r");
 		if (!fp) {
 			fprintf(stderr, "%s: %s\n", progname, strerror(errno));
-			return 0;
+		} else {
+			ret = fread(supp_features, 1,
+				    sizeof(supp_features) - 1, fp);
+			supp_features[ret] = '\0';
+			pclose(fp);
 		}
-		ret = fread(supp_features, 1, sizeof(supp_features) - 1, fp);
-		supp_features[ret] = '\0';
-		pclose(fp);
 	}
-	if (ret > 0 && strstr(supp_features,
-			      strncmp(feature, "-O ", 3) ? feature : feature+3))
-		return 0;
+
+	if (strstr(supp_features,
+		   strncmp(feature, "-O ", 3) ? feature : feature + 3))
+		return true;
 
 	if ((fd = mkstemp(imgname)) < 0)
-		return -1;
-	else
-		close(fd);
+		return false;
+
+	close(fd);
 
 	snprintf(cmd, sizeof(cmd), "%s -F %s %s 100 >/dev/null 2>&1",
 		 MKE2FS, feature, imgname);
@@ -499,10 +423,9 @@ static int is_e2fsprogs_feature_supp(const char *feature)
 	ret = system(cmd);
 	unlink(imgname);
 
-	return ret;
+	return ret == 0;
 }
 
-
 /**
  * append_unique: append @key or @key=@val pair to @buf only if @key does not
  *                exists
@@ -540,14 +463,14 @@ static void append_unique(char *buf, char *prefix, char *key, char *val,
 }
 
 static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor,
-					size_t maxbuflen, int user_spec)
+					size_t maxbuflen, bool user_spec)
 {
-	int enable_64bit = 0;
+	unsigned long long blocks = mop->mo_device_kb / mop->mo_blocksize_kb;
+	bool enable_64bit = false;
 
 	/* Enable large block addresses if the LUN is over 2^32 blocks. */
-	if ((mop->mo_device_kb / (L_BLOCK_SIZE >> 10) > UINT32_MAX) &&
-	     is_e2fsprogs_feature_supp("-O 64bit") == 0)
-		enable_64bit = 1;
+	if (blocks > 0xffffffffULL && is_e2fsprogs_feature_supp("-O 64bit"))
+		enable_64bit = true;
 
 	if (IS_OST(&mop->mo_ldd)) {
 		append_unique(anchor, user_spec ? "," : " -O ",
@@ -568,14 +491,14 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor,
 
 	/* Multiple mount protection enabled only if failover node specified */
 	if (mop->mo_flags & MO_FAILOVER) {
-		if (is_e2fsprogs_feature_supp("-O mmp") == 0)
+		if (is_e2fsprogs_feature_supp("-O mmp"))
 			append_unique(anchor, ",", "mmp", NULL, maxbuflen);
 		else
 			disp_old_e2fsprogs_msg("mmp", 1);
 	}
 
 	/* Allow more than 65000 subdirectories */
-	if (is_e2fsprogs_feature_supp("-O dir_nlink") == 0)
+	if (is_e2fsprogs_feature_supp("-O dir_nlink"))
 		append_unique(anchor, ",", "dir_nlink", NULL, maxbuflen);
 
 	/* The following options are only valid for ext4-based ldiskfs.
@@ -584,8 +507,11 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor,
 		return 0;
 
 	/* Enable quota by default */
-	if (is_e2fsprogs_feature_supp("-O quota") == 0) {
+	if (is_e2fsprogs_feature_supp("-O quota")) {
 		append_unique(anchor, ",", "quota", NULL, maxbuflen);
+		/* Enable project quota by default */
+		if (is_e2fsprogs_feature_supp("-O project"))
+			append_unique(anchor, ",", "project", NULL, maxbuflen);
 	} else {
 		fatal();
 		fprintf(stderr, "\"-O quota\" must be supported by "
@@ -594,16 +520,30 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor,
 	}
 
 	/* Allow files larger than 2TB.  Also needs LU-16, but not harmful. */
-	if (is_e2fsprogs_feature_supp("-O huge_file") == 0)
+	if (is_e2fsprogs_feature_supp("-O huge_file"))
 		append_unique(anchor, ",", "huge_file", NULL, maxbuflen);
 
 	if (enable_64bit)
 		append_unique(anchor, ",", "64bit", NULL, maxbuflen);
 
+	if (blocks >= 0x1000000000 && is_e2fsprogs_feature_supp("-O meta_bg"))
+		append_unique(anchor, ",", "meta_bg", NULL, maxbuflen);
+
+	if (enable_64bit || strstr(mop->mo_mkfsopts, "meta_bg"))
+		append_unique(anchor, ",", "^resize_inode", NULL, maxbuflen);
+
+	/* Allow xattrs larger than one block, stored in a separate inode */
+	if (IS_MDT(&mop->mo_ldd) && is_e2fsprogs_feature_supp("-O ea_inode"))
+		append_unique(anchor, ",", "ea_inode", NULL, maxbuflen);
+
+	/* Allow more than 10M directory entries */
+	if (IS_MDT(&mop->mo_ldd) && is_e2fsprogs_feature_supp("-O large_dir"))
+		append_unique(anchor, ",", "large_dir", NULL, maxbuflen);
+
 	/* Cluster inode/block bitmaps and inode table for more efficient IO.
 	 * Align the flex groups on a 1MB boundary for better performance. */
 	/* This -O feature needs to go last, since it adds the "-G" option. */
-	if (is_e2fsprogs_feature_supp("-O flex_bg") == 0) {
+	if (is_e2fsprogs_feature_supp("-O flex_bg")) {
 		char tmp_buf[64];
 
 		append_unique(anchor, ",", "flex_bg", NULL, maxbuflen);
@@ -611,7 +551,7 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor,
 		if (IS_OST(&mop->mo_ldd) &&
 		    strstr(mop->mo_mkfsopts, "-G") == NULL) {
 			snprintf(tmp_buf, sizeof(tmp_buf), " -G %u",
-				 (1 << 20) / L_BLOCK_SIZE);
+				 1024 / mop->mo_blocksize_kb);
 			strscat(anchor, tmp_buf, maxbuflen);
 		}
 	}
@@ -666,263 +606,305 @@ static char *moveopts_to_end(char *start)
 /* Build fs according to type */
 int ldiskfs_make_lustre(struct mkfs_opts *mop)
 {
-	__u64 device_kb = mop->mo_device_kb, block_count = 0;
 	char mkfs_cmd[PATH_MAX];
 	char buf[64];
 	char *start;
 	char *dev;
 	int ret = 0, ext_opts = 0;
+	bool enable_64bit = false;
+	long inode_size = 0;
 	size_t maxbuflen;
 
+	mop->mo_blocksize_kb = 4;
+
+	start = strstr(mop->mo_mkfsopts, "-b");
+	if (start) {
+		char *end = NULL;
+		long blocksize;
+
+		blocksize = strtol(start + 2, &end, 0);
+		if (end && (*end == 'k' || *end == 'K'))
+			blocksize *= 1024;
+		/* EXT4_MIN_BLOCK_SIZE || EXT4_MAX_BLOCK_SIZE */
+		if (blocksize < 1024 || blocksize > 65536) {
+			fprintf(stderr,
+				"%s: blocksize %lu not in 1024-65536 bytes, normally 4096 bytes\n",
+				progname, blocksize);
+			return EINVAL;
+		}
+
+		if ((blocksize & (blocksize - 1)) != 0) {
+			fprintf(stderr,
+				"%s: blocksize %lu not a power-of-two value\n",
+				progname, blocksize);
+			return EINVAL;
+		}
+		mop->mo_blocksize_kb = blocksize >> 10;
+	}
+
 	if (!(mop->mo_flags & MO_IS_LOOP)) {
-		mop->mo_device_kb = get_device_size(mop->mo_device);
+		__u64 device_kb = get_device_size(mop->mo_device);
 
-		if (mop->mo_device_kb == 0)
+		if (device_kb == 0)
 			return ENODEV;
 
 		/* Compare to real size */
-		if (device_kb == 0 || device_kb > mop->mo_device_kb)
-			device_kb = mop->mo_device_kb;
-		else
+		if (mop->mo_device_kb == 0 || device_kb < mop->mo_device_kb)
 			mop->mo_device_kb = device_kb;
 	}
 
 	if (mop->mo_device_kb != 0) {
+		__u64 block_count;
+
 		if (mop->mo_device_kb < 32384) {
 			fprintf(stderr, "%s: size of filesystem must be larger "
 				"than 32MB, but is set to %lldKB\n",
 				progname, (long long)mop->mo_device_kb);
 			return EINVAL;
 		}
-		block_count = mop->mo_device_kb / (L_BLOCK_SIZE >> 10);
-		/* If the LUN size is just over 2^32 blocks, limit the
-		 * filesystem size to 2^32-1 blocks to avoid problems with
-		 * ldiskfs/mkfs not handling this size.  Bug 22906 */
-		if (block_count > 0xffffffffULL && block_count < 0x100002000ULL)
-			block_count = 0xffffffffULL;
+		block_count = mop->mo_device_kb / mop->mo_blocksize_kb;
+		if (block_count > 0xffffffffULL) {
+			/* If the LUN size is just over 2^32 blocks, limit the
+			 * filesystem size to 2^32-1 blocks to avoid problems
+			 * with ldiskfs/mkfs not handling this well. b=22906
+			 */
+			if (block_count < 0x100002000ULL)
+				mop->mo_device_kb =
+					0xffffffffULL * mop->mo_blocksize_kb;
+			else
+				enable_64bit = true;
+		}
 	}
 
-	if ((mop->mo_ldd.ldd_mount_type == LDD_MT_EXT3) ||
-	    (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS) ||
-	    (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS2)) {
-		long inode_size = 0;
-
-		/* Journal size in MB */
-		if (strstr(mop->mo_mkfsopts, "-J") == NULL &&
-		    device_kb > 1024 * 1024) {
-			/* Choose our own default journal size */
-			long journal_mb = 0, max_mb;
-
-			/* cap journal size at 4GB for MDT,
-			 * leave it at 400MB for OSTs. */
-			if (IS_MDT(&mop->mo_ldd))
-				max_mb = 4096;
-			else if (IS_OST(&mop->mo_ldd))
-				max_mb = 400;
-			else /* Use mke2fs default size for MGS */
-				max_mb = 0;
-
-			/* Use at most 4% of device for journal */
-			journal_mb = device_kb * 4 / (1024 * 100);
-			if (journal_mb > max_mb)
-				journal_mb = max_mb;
-
-			if (journal_mb) {
-				sprintf(buf, " -J size=%ld", journal_mb);
-				strscat(mop->mo_mkfsopts, buf,
-					sizeof(mop->mo_mkfsopts));
-			}
-		}
+	if ((mop->mo_ldd.ldd_mount_type != LDD_MT_EXT3) &&
+	    (mop->mo_ldd.ldd_mount_type != LDD_MT_LDISKFS) &&
+	    (mop->mo_ldd.ldd_mount_type != LDD_MT_LDISKFS2)) {
+		fprintf(stderr, "%s: unsupported fs type: %d (%s)\n",
+			progname, mop->mo_ldd.ldd_mount_type,
+			MT_STR(&mop->mo_ldd));
 
-		/*
-		 * The inode size is constituted by following elements
-		 * (assuming all files are in composite layout and has
-		 * 3 components):
-		 *
-		 *   ldiskfs inode size: 160
-		 *   MDT extended attributes size, including:
-		 *	ext4_xattr_header: 32
-		 *	LOV EA size: 32(lov_comp_md_v1) +
-		 *		     3 * 40(lov_comp_md_entry_v1) +
-		 *		     3 * 32(lov_mds_md) +
-		 *		     stripes * 24(lov_ost_data) +
-		 *		     16(xattr_entry) + 4("lov")
-		 *	LMA EA size: 24(lustre_mdt_attrs) +
-		 *		     16(xattr_entry) + 4("lma")
-		 *	SOM EA size: 24(lustre_som_attrs) +
-		 *		     16(xattr_entry) + 4("som")
-		 *	link EA size: 24(link_ea_header) + 18(link_ea_entry) +
-		 *		      16(filename) + 16(xattr_entry) + 4("link")
-		 *   and some margin for 4-byte alignment, ACLs and other EAs.
-		 *
-		 * If we say the average filename length is about 32 bytes,
-		 * the calculation looks like:
-		 * 160 + 32 + (32+3*(40+32)+24*stripes+20) + (24+20) + (24+20) +
-		 *  (24+20) + (~42+16+20) + other <= 512*2^m, {m=0,1,2,3}
-		 */
-		if (strstr(mop->mo_mkfsopts, "-I") == NULL) {
-			if (IS_MDT(&mop->mo_ldd)) {
-				if (mop->mo_stripe_count > 59)
-					inode_size = 512; /* bz 7241 */
-				/* see also "-i" below for EA blocks */
-				else if (mop->mo_stripe_count > 16)
-					inode_size = 2048;
-				else
-					inode_size = 1024;
-			} else if (IS_OST(&mop->mo_ldd)) {
-				/* We store MDS FID and necessary composite
-				 * layout information in the OST object EA:
-				 *   ldiskfs inode size: 160
-				 *   OST extended attributes size, including:
-				 *	ext4_xattr_header: 32
-				 *	LMA EA size: 24(lustre_mdt_attrs) +
-				 *		     16(xattr_entry) + 4("lma")
-				 *	FID EA size: 52(filter_fid) +
-				 *		     16(xattr_entry) + 4("fid")
-				 * 160 + 32 + (24+20) + (52+20) = 308
-				 */
-				inode_size = 512;
-			}
+		return EINVAL;
+	}
 
-			if (inode_size > 0) {
-				sprintf(buf, " -I %ld", inode_size);
-				strscat(mop->mo_mkfsopts, buf,
-					sizeof(mop->mo_mkfsopts));
-			}
+	/* Journal size in MB */
+	if (strstr(mop->mo_mkfsopts, "-J") == NULL &&
+	    mop->mo_device_kb > 1024 * 1024) {
+		/* Choose our own default journal size */
+		long journal_mb = 0, max_mb;
+
+		/* cap journal size at 4GB for MDT, leave at 1GB for OSTs */
+		if (IS_MDT(&mop->mo_ldd))
+			max_mb = 4096;
+		else if (IS_OST(&mop->mo_ldd))
+			max_mb = 1024;
+		else /* Use mke2fs default size for MGS */
+			max_mb = 0;
+
+		/* Use at most 4% of device for journal */
+		journal_mb = mop->mo_device_kb * 4 / (1024 * 100);
+		if (journal_mb > max_mb)
+			journal_mb = max_mb;
+
+		if (journal_mb) {
+			snprintf(buf, sizeof(buf), " -J size=%ld", journal_mb);
+			strscat(mop->mo_mkfsopts, buf,
+				sizeof(mop->mo_mkfsopts));
 		}
+	}
 
-		/* Bytes_per_inode: disk size / num inodes */
-		if (strstr(mop->mo_mkfsopts, "-i") == NULL &&
-		    strstr(mop->mo_mkfsopts, "-N") == NULL) {
-			long bytes_per_inode = 0;
-
-			/* Allocate more inodes on MDT devices.  There is
-			 * no data stored on the MDT, and very little extra
-			 * metadata beyond the inode.  It could go down as
-			 * low as 1024 bytes, but this is conservative.
-			 * Account for external EA blocks for wide striping. */
-			if (IS_MDT(&mop->mo_ldd)) {
-				bytes_per_inode = inode_size + 1536;
-
-				if (mop->mo_stripe_count > 59) {
-					int extra = mop->mo_stripe_count * 24;
-					extra = ((extra - 1) | 4095) + 1;
-					bytes_per_inode += extra;
-				}
-			}
-
-			/* Allocate fewer inodes on large OST devices.  Most
-			 * filesystems can be much more aggressive than even
-			 * this, but it is impossible to know in advance. */
-			if (IS_OST(&mop->mo_ldd)) {
-				/* OST > 16TB assume average file size 1MB */
-				if (device_kb > (16ULL << 30))
-					bytes_per_inode = 1024 * 1024;
-				/* OST > 4TB assume average file size 512kB */
-				else if (device_kb > (4ULL << 30))
-					bytes_per_inode = 512 * 1024;
-				/* OST > 1TB assume average file size 256kB */
-				else if (device_kb > (1ULL << 30))
-					bytes_per_inode = 256 * 1024;
-				/* OST > 10GB assume average file size 64kB,
-				 * plus a bit so that inodes will fit into a
-				 * 256x flex_bg without overflowing */
-				else if (device_kb > (10ULL << 20))
-					bytes_per_inode = 69905;
-			}
-
-			if (bytes_per_inode > 0) {
-				sprintf(buf, " -i %ld", bytes_per_inode);
-				strscat(mop->mo_mkfsopts, buf,
-					sizeof(mop->mo_mkfsopts));
-				mop->mo_inode_size = bytes_per_inode;
-			}
+	/*
+	 * The inode size is constituted by following elements
+	 * (assuming all files are in composite layout and has
+	 * 3 components):
+	 *
+	 *   ldiskfs inode size: 160
+	 *   MDT extended attributes size, including:
+	 *	ext4_xattr_header: 32
+	 *	LOV EA size: 32(lov_comp_md_v1) +
+	 *		     3 * 40(lov_comp_md_entry_v1) +
+	 *		     3 * 32(lov_mds_md) +
+	 *		     stripes * 24(lov_ost_data) +
+	 *		     16(xattr_entry) + 4("lov")
+	 *	LMA EA size: 24(lustre_mdt_attrs) +
+	 *		     16(xattr_entry) + 4("lma")
+	 *	SOM EA size: 24(lustre_som_attrs) +
+	 *		     16(xattr_entry) + 4("som")
+	 *	link EA size: 24(link_ea_header) + 18(link_ea_entry) +
+	 *		      16(filename) + 16(xattr_entry) + 4("link")
+	 *   and some margin for 4-byte alignment, ACLs and other EAs.
+	 *
+	 * If we say the average filename length is about 32 bytes,
+	 * the calculation looks like:
+	 * 160 + 32 + (32+3*(40+32)+24*stripes+20) + (24+20) + (24+20) +
+	 *  (24+20) + (~42+16+20) + other <= 512*2^m, {m=0,1,2,3}
+	 */
+	if (strstr(mop->mo_mkfsopts, "-I") == NULL) {
+		if (IS_MDT(&mop->mo_ldd)) {
+			if (mop->mo_stripe_count > 59)
+				inode_size = 512; /* bz 7241 */
+			/* see also "-i" below for EA blocks */
+			else if (mop->mo_stripe_count > 16)
+				inode_size = 2048;
+			else
+				inode_size = 1024;
+		} else if (IS_OST(&mop->mo_ldd)) {
+			/* We store MDS FID and necessary composite
+			 * layout information in the OST object EA:
+			 *   ldiskfs inode size: 160
+			 *   OST extended attributes size, including:
+			 *	ext4_xattr_header: 32
+			 *	LMA EA size: 24(lustre_mdt_attrs) +
+			 *		     16(xattr_entry) + 4("lma")
+			 *	FID EA size: 52(filter_fid) +
+			 *		     16(xattr_entry) + 4("fid")
+			 * 160 + 32 + (24+20) + (52+20) = 308
+			 */
+			inode_size = 512;
 		}
 
-		if (verbose < 2) {
-			strscat(mop->mo_mkfsopts, " -q",
+		if (inode_size > 0) {
+			snprintf(buf, sizeof(buf), " -I %ld", inode_size);
+			strscat(mop->mo_mkfsopts, buf,
 				sizeof(mop->mo_mkfsopts));
 		}
+	}
+
+	/* Bytes_per_inode: disk size / num inodes */
+	if (strstr(mop->mo_mkfsopts, "-i") == NULL &&
+	    strstr(mop->mo_mkfsopts, "-N") == NULL) {
+		long bytes_per_inode = 0;
+
+		/* Allocate more inodes on MDT devices.  There is
+		 * no data stored on the MDT, and very little extra
+		 * metadata beyond the inode.  It could go down as
+		 * low as 1024 bytes, but this is conservative.
+		 * Account for external EA blocks for wide striping.
+		 */
+		if (IS_MDT(&mop->mo_ldd)) {
+			bytes_per_inode = inode_size + 1536;
 
-		/* start handle -O mkfs options */
-		if ((start = strstr(mop->mo_mkfsopts, "-O")) != NULL) {
-			if (strstr(start + 2, "-O") != NULL) {
-				fprintf(stderr,
-					"%s: don't specify multiple -O options\n",
-					progname);
-				return EINVAL;
+			if (mop->mo_stripe_count > 59) {
+				int extra = mop->mo_stripe_count * 24;
+
+				extra = ((extra - 1) | 4095) + 1;
+				bytes_per_inode += extra;
 			}
-			start = moveopts_to_end(start);
-			maxbuflen = sizeof(mop->mo_mkfsopts) -
-				(start - mop->mo_mkfsopts) - strlen(start);
-			ret = enable_default_ext4_features(mop, start, maxbuflen, 1);
-		} else {
-			start = mop->mo_mkfsopts + strlen(mop->mo_mkfsopts),
-			      maxbuflen = sizeof(mop->mo_mkfsopts) -
-				      strlen(mop->mo_mkfsopts);
-			ret = enable_default_ext4_features(mop, start, maxbuflen, 0);
 		}
-		if (ret)
-			return ret;
-		/* end handle -O mkfs options */
-
-		/* start handle -E mkfs options */
-		if ((start = strstr(mop->mo_mkfsopts, "-E")) != NULL) {
-			if (strstr(start + 2, "-E") != NULL) {
-				fprintf(stderr,
-					"%s: don't specify multiple -E options\n",
-					progname);
-				return EINVAL;
-			}
-			start = moveopts_to_end(start);
-			maxbuflen = sizeof(mop->mo_mkfsopts) -
-				(start - mop->mo_mkfsopts) - strlen(start);
-			ext_opts = 1;
-		} else {
-			start = mop->mo_mkfsopts + strlen(mop->mo_mkfsopts);
-			maxbuflen = sizeof(mop->mo_mkfsopts) -
-				strlen(mop->mo_mkfsopts);
+
+		/* Allocate fewer inodes on large OST devices.  Most
+		 * filesystems can be much more aggressive than even
+		 * this, but it is impossible to know in advance.
+		 */
+		if (IS_OST(&mop->mo_ldd)) {
+			/* OST > 16TB assume average file size 1MB */
+			if (mop->mo_device_kb > (16ULL << 30))
+				bytes_per_inode = 1024 * 1024;
+			/* OST > 4TB assume average file size 512kB */
+			else if (mop->mo_device_kb > (4ULL << 30))
+				bytes_per_inode = 512 * 1024;
+			/* OST > 1TB assume average file size 256kB */
+			else if (mop->mo_device_kb > (1ULL << 30))
+				bytes_per_inode = 256 * 1024;
+			/* OST > 10GB assume average file size 64kB,
+			 * plus a bit so that inodes will fit into a
+			 * 256x flex_bg without overflowing.
+			 */
+			else if (mop->mo_device_kb > (10ULL << 20))
+				bytes_per_inode = 69905;
 		}
 
-		/* In order to align the filesystem metadata on 1MB boundaries,
-		 * give a resize value that will reserve a power-of-two group
-		 * descriptor blocks, but leave one block for the superblock.
-		 * Only useful for filesystems with < 2^32 blocks due to resize
-		 * limitations. */
-		if (strstr(mop->mo_mkfsopts, "meta_bg") == NULL &&
-		    IS_OST(&mop->mo_ldd) && mop->mo_device_kb > 100 * 1024 &&
-		    mop->mo_device_kb * 1024 / L_BLOCK_SIZE <= 0xffffffffULL) {
-			unsigned group_blocks = L_BLOCK_SIZE * 8;
-			unsigned desc_per_block = L_BLOCK_SIZE / 32;
-			unsigned resize_blks;
-
-			resize_blks = (1ULL<<32) - desc_per_block*group_blocks;
-			snprintf(buf, sizeof(buf), "%u", resize_blks);
-			append_unique(start, ext_opts ? "," : " -E ",
-				      "resize", buf, maxbuflen);
-			ext_opts = 1;
+		if (bytes_per_inode > 0) {
+			snprintf(buf, sizeof(buf), " -i %ld", bytes_per_inode);
+			strscat(mop->mo_mkfsopts, buf,
+				sizeof(mop->mo_mkfsopts));
+			mop->mo_inode_size = bytes_per_inode;
 		}
+	}
 
-		/* Avoid zeroing out the full journal - speeds up mkfs */
-		if (is_e2fsprogs_feature_supp("-E lazy_journal_init") == 0)
-			append_unique(start, ext_opts ? "," : " -E ",
-				      "lazy_journal_init", NULL, maxbuflen);
-		/* end handle -E mkfs options */
+	if (verbose < 2)
+		strscat(mop->mo_mkfsopts, " -q", sizeof(mop->mo_mkfsopts));
 
-		/* Allow reformat of full devices (as opposed to
-		   partitions.)  We already checked for mounted dev. */
-		strscat(mop->mo_mkfsopts, " -F", sizeof(mop->mo_mkfsopts));
+	/* start handle -O mkfs options */
+	start = strstr(mop->mo_mkfsopts, "-O");
+	if (start) {
+		if (strstr(start + 2, "-O") != NULL) {
+			fprintf(stderr,
+				"%s: don't specify multiple -O options\n",
+				progname);
+			return EINVAL;
+		}
+		start = moveopts_to_end(start);
+		maxbuflen = sizeof(mop->mo_mkfsopts) -
+			(start - mop->mo_mkfsopts) - strlen(start);
+		ret = enable_default_ext4_features(mop, start, maxbuflen, 1);
+	} else {
+		start = mop->mo_mkfsopts + strlen(mop->mo_mkfsopts);
+		maxbuflen = sizeof(mop->mo_mkfsopts) - strlen(mop->mo_mkfsopts);
+		ret = enable_default_ext4_features(mop, start, maxbuflen, 0);
+	}
+	if (ret)
+		return ret;
+	/* end handle -O mkfs options */
 
-		snprintf(mkfs_cmd, sizeof(mkfs_cmd),
-			 "%s -j -b %d -L %s ", MKE2FS, L_BLOCK_SIZE,
-			 mop->mo_ldd.ldd_svname);
+	/* start handle -E mkfs options */
+	start = strstr(mop->mo_mkfsopts, "-E");
+	if (start) {
+		if (strstr(start + 2, "-E") != NULL) {
+			fprintf(stderr,
+				"%s: don't specify multiple -E options\n",
+				progname);
+			return EINVAL;
+		}
+		start = moveopts_to_end(start);
+		maxbuflen = sizeof(mop->mo_mkfsopts) -
+			(start - mop->mo_mkfsopts) - strlen(start);
+		ext_opts = 1;
 	} else {
-		fprintf(stderr,"%s: unsupported fs type: %d (%s)\n",
-			progname, mop->mo_ldd.ldd_mount_type,
-			MT_STR(&mop->mo_ldd));
-		return EINVAL;
+		start = mop->mo_mkfsopts + strlen(mop->mo_mkfsopts);
+		maxbuflen = sizeof(mop->mo_mkfsopts) - strlen(mop->mo_mkfsopts);
+	}
+
+	/* In order to align the filesystem metadata on 1MB boundaries,
+	 * give a resize value that will reserve a power-of-two group
+	 * descriptor blocks, but leave one block for the superblock.
+	 * Only useful for filesystems with < 2^32 blocks due to resize
+	 * limitations.
+	 */
+	if (!enable_64bit && strstr(mop->mo_mkfsopts, "meta_bg") == NULL &&
+	    IS_OST(&mop->mo_ldd) && mop->mo_device_kb > 100 * 1024) {
+		unsigned int group_blocks = mop->mo_blocksize_kb * 8192;
+		unsigned int desc_per_block = mop->mo_blocksize_kb * 1024 / 32;
+		unsigned int resize_blks;
+
+		resize_blks = (1ULL<<32) - desc_per_block*group_blocks;
+		snprintf(buf, sizeof(buf), "%u", resize_blks);
+		append_unique(start, ext_opts ? "," : " -E ",
+			      "resize", buf, maxbuflen);
+		ext_opts = 1;
 	}
 
+	/* Avoid zeroing out the full journal - speeds up mkfs */
+	if (is_e2fsprogs_feature_supp("-E lazy_journal_init=0")) {
+		append_unique(start, ext_opts ? "," : " -E ",
+			      "lazy_journal_init", "0", maxbuflen);
+		ext_opts = 1;
+	}
+	if (is_e2fsprogs_feature_supp("-E lazy_itable_init=0")) {
+		append_unique(start, ext_opts ? "," : "-E",
+			    "lazy_itable_init", "0", maxbuflen);
+		ext_opts = 1;
+	}
+
+	/* end handle -E mkfs options */
+
+	/* Allow reformat of full devices (as opposed to partitions).
+	 * We already checked for mounted dev.
+	 */
+	strscat(mop->mo_mkfsopts, " -F", sizeof(mop->mo_mkfsopts));
+
+	snprintf(mkfs_cmd, sizeof(mkfs_cmd), "%s -j -b %d -L %s ", MKE2FS,
+		 mop->mo_blocksize_kb * 1024, mop->mo_ldd.ldd_svname);
+
 	/* For loop device format the dev, not the filename */
 	dev = mop->mo_device;
 	if (mop->mo_flags & MO_IS_LOOP)
@@ -931,16 +913,16 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop)
 	vprint("formatting backing filesystem %s on %s\n",
 	       MT_STR(&mop->mo_ldd), dev);
 	vprint("\ttarget name   %s\n", mop->mo_ldd.ldd_svname);
-	vprint("\t4k blocks     %ju\n", (uintmax_t)block_count);
+	vprint("\tkilobytes     %llu\n", mop->mo_device_kb);
 	vprint("\toptions       %s\n", mop->mo_mkfsopts);
 
 	/* mkfs_cmd's trailing space is important! */
 	strscat(mkfs_cmd, mop->mo_mkfsopts, sizeof(mkfs_cmd));
 	strscat(mkfs_cmd, " ", sizeof(mkfs_cmd));
 	strscat(mkfs_cmd, dev, sizeof(mkfs_cmd));
-	if (block_count != 0) {
-		snprintf(buf, sizeof(buf), " %ju",
-			 (uintmax_t)block_count);
+	if (mop->mo_device_kb != 0) {
+		snprintf(buf, sizeof(buf), " %lluk",
+			 (unsigned long long)mop->mo_device_kb);
 		strscat(mkfs_cmd, buf, sizeof(mkfs_cmd));
 	}
 
@@ -1173,7 +1155,9 @@ static int tune_block_dev_scheduler(const char *sys_path, const char *new_sched)
 	*e = '\0';
 
 	if (strcmp(old_sched, "noop") == 0 ||
-	    strcmp(old_sched, new_sched) == 0)
+	    strcmp(old_sched, "deadline") == 0 ||
+	    strcmp(old_sched, "mq-deadline") == 0 ||
+	    strstr(old_sched, new_sched) == 0)
 		return 0;
 
 	rc = write_file(path, new_sched);
@@ -1210,13 +1194,13 @@ static int tune_block_dev_slaves(const char *sys_path, struct mount_opts *mop)
 	}
 
 	while ((d = readdir(slaves_dir)) != NULL) {
-		char path[PATH_MAX];
+		char path[PATH_MAX * 2];
 		int rc2;
 
 		if (d->d_type != DT_LNK)
 			continue;
 
-		snprintf(path, sizeof(path), "%s/%s", slaves_path, d->d_name);
+		snprintf(path, sizeof(path), "/dev/%s", d->d_name);
 		rc2 = tune_block_dev(path, mop);
 		if (rc2 != 0)
 			rc = rc2;
@@ -1234,7 +1218,7 @@ static int tune_block_dev(const char *src, struct mount_opts *mop)
 {
 	struct stat st;
 	char sys_path[PATH_MAX];
-	char partition_path[PATH_MAX];
+	char partition_path[PATH_MAX + sizeof("partition")];
 	char *real_sys_path = NULL;
 	int rc;
 
@@ -1365,14 +1349,6 @@ int ldiskfs_rename_fsname(struct mkfs_opts *mop, const char *oldname)
 		return ret;
 	}
 
-#ifdef HAVE_SELINUX
-	/*
-	 * Append file context to mount options if SE Linux is enabled
-	 */
-	if (is_selinux_enabled() > 0)
-		append_context_for_mount(mntpt, mop);
-#endif
-
 	if (mop->mo_flags & MO_IS_LOOP)
 		dev = mop->mo_loopdev;
 	else
@@ -1404,7 +1380,7 @@ int ldiskfs_enable_quota(struct mkfs_opts *mop)
 	char cmd[512];
 	int cmdsz = sizeof(cmd), ret;
 
-	if (is_e2fsprogs_feature_supp("-O quota") != 0) {
+	if (!is_e2fsprogs_feature_supp("-O quota")) {
 		fprintf(stderr, "%s: \"-O quota\" is is not supported by "
 			"current e2fsprogs\n", progname);
 		return EINVAL;
@@ -1415,7 +1391,9 @@ int ldiskfs_enable_quota(struct mkfs_opts *mop)
 		dev = mop->mo_loopdev;
 
 	/* Quota feature is already enabled? */
-	if (is_feature_enabled("quota", dev)) {
+	if (!backfs)
+		ext2fs_open(dev, open_flags, 0, 0, unix_io_manager, &backfs);
+	if (backfs && ext2fs_has_feature_quota(backfs->super)) {
 		vprint("Quota feature is already enabled.\n");
 		return 0;
 	}
@@ -1439,7 +1417,10 @@ int ldiskfs_init(void)
 
 void ldiskfs_fini(void)
 {
-	return;
+	if (backfs) {
+		ext2fs_close(backfs);
+		backfs = NULL;
+	}
 }
 
 #ifndef PLUGIN_DIR