Whamcloud - gitweb
LU-11736 utils: don't set max_sectors_kb on MDT/MGT
[fs/lustre-release.git] / lustre / utils / libmount_utils_ldiskfs.c
index b82b764..c1f79ae 100644 (file)
@@ -23,7 +23,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2012, 2016, Intel Corporation.
+ * Copyright (c) 2012, 2017, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -57,6 +57,7 @@
 #include <sys/stat.h>
 #include <sys/mount.h>
 #include <sys/utsname.h>
 #include <sys/stat.h>
 #include <sys/mount.h>
 #include <sys/utsname.h>
+#include <sys/sysmacros.h>
 
 #include <string.h>
 #include <getopt.h>
 
 #include <string.h>
 #include <getopt.h>
@@ -191,7 +192,7 @@ static int is_feature_enabled(const char *feature, const char *devpath)
 int ldiskfs_write_ldd(struct mkfs_opts *mop)
 {
        char mntpt[] = "/tmp/mntXXXXXX";
 int ldiskfs_write_ldd(struct mkfs_opts *mop)
 {
        char mntpt[] = "/tmp/mntXXXXXX";
-       char filepnm[128];
+       char filepnm[192];
        char *dev;
        FILE *filep;
        int ret = 0;
        char *dev;
        FILE *filep;
        int ret = 0;
@@ -387,7 +388,7 @@ static void disp_old_e2fsprogs_msg(const char *feature, int make_backfs)
                E2FSPROGS, feature);
 #if !(HAVE_LDISKFSPROGS)
        fprintf(stderr, "Please install the latest version of e2fsprogs from\n"
                E2FSPROGS, feature);
 #if !(HAVE_LDISKFSPROGS)
        fprintf(stderr, "Please install the latest version of e2fsprogs from\n"
-               "https://downloads.hpdd.intel.com/public/e2fsprogs/latest/\n"
+               "https://downloads.whamcloud.com/public/e2fsprogs/latest/\n"
                "to enable this feature.\n");
 #endif
        if (make_backfs)
                "to enable this feature.\n");
 #endif
        if (make_backfs)
@@ -541,6 +542,13 @@ static void append_unique(char *buf, char *prefix, char *key, char *val,
 static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor,
                                        size_t maxbuflen, int user_spec)
 {
 static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor,
                                        size_t maxbuflen, int user_spec)
 {
+       int enable_64bit = 0;
+
+       /* Enable large block addresses if the LUN is over 2^32 blocks. */
+       if ((mop->mo_device_kb / (L_BLOCK_SIZE >> 10) > UINT32_MAX) &&
+            is_e2fsprogs_feature_supp("-O 64bit") == 0)
+               enable_64bit = 1;
+
        if (IS_OST(&mop->mo_ldd)) {
                append_unique(anchor, user_spec ? "," : " -O ",
                              "extents", NULL, maxbuflen);
        if (IS_OST(&mop->mo_ldd)) {
                append_unique(anchor, user_spec ? "," : " -O ",
                              "extents", NULL, maxbuflen);
@@ -549,7 +557,10 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor,
                append_unique(anchor, user_spec ? "," : " -O ",
                              "dirdata", NULL, maxbuflen);
                append_unique(anchor, ",", "uninit_bg", NULL, maxbuflen);
                append_unique(anchor, user_spec ? "," : " -O ",
                              "dirdata", NULL, maxbuflen);
                append_unique(anchor, ",", "uninit_bg", NULL, maxbuflen);
-               append_unique(anchor, ",", "^extents", NULL, maxbuflen);
+               if (enable_64bit)
+                       append_unique(anchor, ",", "extents", NULL, maxbuflen);
+               else
+                       append_unique(anchor, ",", "^extents", NULL, maxbuflen);
        } else {
                append_unique(anchor, user_spec ? "," : " -O ",
                              "uninit_bg", NULL, maxbuflen);
        } else {
                append_unique(anchor, user_spec ? "," : " -O ",
                              "uninit_bg", NULL, maxbuflen);
@@ -586,9 +597,7 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor,
        if (is_e2fsprogs_feature_supp("-O huge_file") == 0)
                append_unique(anchor, ",", "huge_file", NULL, maxbuflen);
 
        if (is_e2fsprogs_feature_supp("-O huge_file") == 0)
                append_unique(anchor, ",", "huge_file", NULL, maxbuflen);
 
-       /* Enable large block addresses if the LUN is over 2^32 blocks. */
-       if (mop->mo_device_kb / (L_BLOCK_SIZE >> 10) >= 0x100002000ULL &&
-           is_e2fsprogs_feature_supp("-O 64bit") == 0)
+       if (enable_64bit)
                append_unique(anchor, ",", "64bit", NULL, maxbuflen);
 
        /* Cluster inode/block bitmaps and inode table for more efficient IO.
                append_unique(anchor, ",", "64bit", NULL, maxbuflen);
 
        /* Cluster inode/block bitmaps and inode table for more efficient IO.
@@ -730,24 +739,26 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop)
                 * (assuming all files are in composite layout and has
                 * 3 components):
                 *
                 * (assuming all files are in composite layout and has
                 * 3 components):
                 *
-                *   ldiskfs inode size: 156
-                *   extended attributes size, including:
+                *   ldiskfs inode size: 160
+                *   MDT extended attributes size, including:
                 *      ext4_xattr_header: 32
                 *      LOV EA size: 32(lov_comp_md_v1) +
                 *                   3 * 40(lov_comp_md_entry_v1) +
                 *                   3 * 32(lov_mds_md) +
                 *                   stripes * 24(lov_ost_data) +
                 *      ext4_xattr_header: 32
                 *      LOV EA size: 32(lov_comp_md_v1) +
                 *                   3 * 40(lov_comp_md_entry_v1) +
                 *                   3 * 32(lov_mds_md) +
                 *                   stripes * 24(lov_ost_data) +
-                *                   16(xattr_entry) + 3(lov)
+                *                   16(xattr_entry) + 4("lov")
                 *      LMA EA size: 24(lustre_mdt_attrs) +
                 *      LMA EA size: 24(lustre_mdt_attrs) +
-                *                   16(xattr_entry) + 3(lma)
+                *                   16(xattr_entry) + 4("lma")
+                *      SOM EA size: 24(lustre_som_attrs) +
+                *                   16(xattr_entry) + 4("som")
                 *      link EA size: 24(link_ea_header) + 18(link_ea_entry) +
                 *      link EA size: 24(link_ea_header) + 18(link_ea_entry) +
-                *                    (filename) + 16(xattr_entry) + 4(link)
+                *                    16(filename) + 16(xattr_entry) + 4("link")
                 *   and some margin for 4-byte alignment, ACLs and other EAs.
                 *
                 * If we say the average filename length is about 32 bytes,
                 * the calculation looks like:
                 *   and some margin for 4-byte alignment, ACLs and other EAs.
                 *
                 * If we say the average filename length is about 32 bytes,
                 * the calculation looks like:
-                * 156 + 32 + (32+3*(40 + 32)+24*N+19) + (24+19) +
-                * (24+18+~32+20) + other <= 512*2^m, {m=0,1,2,3}
+                * 160 + 32 + (32+3*(40+32)+24*stripes+20) + (24+20) + (24+20) +
+                *  (24+20) + (~42+16+20) + other <= 512*2^m, {m=0,1,2,3}
                 */
                if (strstr(mop->mo_mkfsopts, "-I") == NULL) {
                        if (IS_MDT(&mop->mo_ldd)) {
                 */
                if (strstr(mop->mo_mkfsopts, "-I") == NULL) {
                        if (IS_MDT(&mop->mo_ldd)) {
@@ -760,7 +771,16 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop)
                                        inode_size = 1024;
                        } else if (IS_OST(&mop->mo_ldd)) {
                                /* We store MDS FID and necessary composite
                                        inode_size = 1024;
                        } else if (IS_OST(&mop->mo_ldd)) {
                                /* We store MDS FID and necessary composite
-                                * layout information in the OST object EA. */
+                                * layout information in the OST object EA:
+                                *   ldiskfs inode size: 160
+                                *   OST extended attributes size, including:
+                                *      ext4_xattr_header: 32
+                                *      LMA EA size: 24(lustre_mdt_attrs) +
+                                *                   16(xattr_entry) + 4("lma")
+                                *      FID EA size: 52(filter_fid) +
+                                *                   16(xattr_entry) + 4("fid")
+                                * 160 + 32 + (24+20) + (52+20) = 308
+                                */
                                inode_size = 512;
                        }
 
                                inode_size = 512;
                        }
 
@@ -815,6 +835,7 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop)
                                sprintf(buf, " -i %ld", bytes_per_inode);
                                strscat(mop->mo_mkfsopts, buf,
                                        sizeof(mop->mo_mkfsopts));
                                sprintf(buf, " -i %ld", bytes_per_inode);
                                strscat(mop->mo_mkfsopts, buf,
                                        sizeof(mop->mo_mkfsopts));
+                               mop->mo_inode_size = bytes_per_inode;
                        }
                }
 
                        }
                }
 
@@ -1217,6 +1238,13 @@ static int tune_block_dev(const char *src, struct mount_opts *mop)
        char *real_sys_path = NULL;
        int rc;
 
        char *real_sys_path = NULL;
        int rc;
 
+       /*
+        * Don't apply block device tuning for MDT or MGT devices,
+        * since we don't need huge IO sizes to get good performance
+        */
+       if (!IS_OST(&mop->mo_ldd))
+               return 0;
+
        if (src == NULL)
                return EINVAL;
 
        if (src == NULL)
                return EINVAL;
 
@@ -1246,8 +1274,7 @@ static int tune_block_dev(const char *src, struct mount_opts *mop)
                        goto have_whole_dev;
 
                if (verbose)
                        goto have_whole_dev;
 
                if (verbose)
-                       fprintf(stderr,
-                               "warning: cannot access '%s': %s\n",
+                       fprintf(stderr, "warning: cannot access '%s': %s\n",
                                partition_path, strerror(errno));
                rc = errno;
                goto out;
                                partition_path, strerror(errno));
                rc = errno;
                goto out;