X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Futils%2Fmount_utils_ldiskfs.c;h=4092eca9752dff1c60854bad5c672f95b0f4e286;hp=728c349cfe537991c88d4b56a5686bc541209c34;hb=b6dfd13e1d0ca4d280777cd0a30a78a63d49eda9;hpb=723613a734af240e03d9e8b42afeba442a77142d diff --git a/lustre/utils/mount_utils_ldiskfs.c b/lustre/utils/mount_utils_ldiskfs.c index 728c349..4092eca 100644 --- a/lustre/utils/mount_utils_ldiskfs.c +++ b/lustre/utils/mount_utils_ldiskfs.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2012, 2013, Intel Corporation. + * Copyright (c) 2012, 2016, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -47,12 +43,13 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif -#include "mount_utils.h" -#include +#include #include +#include +#include +#include #include #include -#include #include #include @@ -66,16 +63,11 @@ #include #include -#ifdef __linux__ -/* libcfs.h is not really needed here, but on SLES10/PPC, fs.h includes idr.h - * which requires BITS_PER_LONG to be defined */ -#include #ifndef BLKGETSIZE64 #include /* for BLKGETSIZE64 */ #endif +#include #include -#endif -#include #include #include #include @@ -84,6 +76,8 @@ #include #endif +#include "mount_utils.h" + #define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb" #define MAX_SECTORS_KB_PATH "queue/max_sectors_kb" #define SCHEDULER_PATH "queue/scheduler" @@ -98,11 +92,14 @@ extern char *progname; #define DUMMY_FILE_NAME_LEN 25 #define EXT3_DIRENT_SIZE DUMMY_FILE_NAME_LEN +static void append_unique(char *buf, char *prefix, char *key, char *val, + size_t maxbuflen); + /* - * Concatenate context of the temporary mount point iff selinux is enabled + * Concatenate context of the temporary mount point if selinux is enabled */ #ifdef HAVE_SELINUX -void append_context_for_mount(char *mntpt, struct mkfs_opts *mop) +static void append_context_for_mount(char *mntpt, struct mkfs_opts *mop) { security_context_t fcontext; @@ -114,13 +111,130 @@ void append_context_for_mount(char *mntpt, struct mkfs_opts *mop) } if (fcontext != NULL) { - strcat(mop->mo_ldd.ldd_mount_opts, ",context="); - strcat(mop->mo_ldd.ldd_mount_opts, fcontext); + append_unique(mop->mo_ldd.ldd_mount_opts, + ",", "context", fcontext, + sizeof(mop->mo_ldd.ldd_mount_opts)); freecon(fcontext); } } #endif +/* return canonicalized absolute pathname, even if the target file does not + * exist, unlike realpath */ +static char *absolute_path(char *devname) +{ + char buf[PATH_MAX + 1] = ""; + char *path; + char *ptr; + int len; + + path = malloc(sizeof(buf)); + if (path == NULL) + return NULL; + + if (devname[0] != '/') { + if (getcwd(buf, sizeof(buf) - 1) == NULL) { + free(path); + return NULL; + } + len = snprintf(path, sizeof(buf), "%s/%s", buf, devname); + if (len >= sizeof(buf)) { + free(path); + return NULL; + } + } else { + len = snprintf(path, sizeof(buf), "%s", devname); + if (len >= sizeof(buf)) { + free(path); + return NULL; + } + } + + /* truncate filename before calling realpath */ + ptr = strrchr(path, '/'); + if (ptr == NULL) { + free(path); + return NULL; + } + *ptr = '\0'; + if (buf != realpath(path, buf)) { + free(path); + return NULL; + } + /* add the filename back */ + len = snprintf(path, PATH_MAX, "%s/%s", buf, ptr+1); + if (len >= PATH_MAX) { + free(path); + return NULL; + } + return path; +} + +/* Determine if a device is a block device (as opposed to a file) */ +static int is_block(char *devname) +{ + struct stat st; + int ret = 0; + char *devpath; + + devpath = absolute_path(devname); + if (devpath == NULL) { + fprintf(stderr, "%s: failed to resolve path to %s\n", + progname, devname); + return -1; + } + + ret = access(devname, F_OK); + if (ret != 0) { + if (strncmp(devpath, "/dev/", 5) == 0) { + /* nobody sane wants to create a loopback file under + * /dev. Let's just report the device doesn't exist */ + fprintf(stderr, "%s: %s apparently does not exist\n", + progname, devpath); + ret = -1; + goto out; + } + ret = 0; + goto out; + } + ret = stat(devpath, &st); + if (ret != 0) { + fprintf(stderr, "%s: cannot stat %s\n", progname, devpath); + goto out; + } + ret = S_ISBLK(st.st_mode); +out: + free(devpath); + return ret; +} + +static int is_feature_enabled(const char *feature, const char *devpath) +{ + char cmd[PATH_MAX]; + FILE *fp; + char enabled_features[4096] = ""; + int ret = 1; + + snprintf(cmd, sizeof(cmd), "%s -c -R features %s 2>&1", + DEBUGFS, devpath); + + /* Using popen() instead of run_command() since debugfs does + * not return proper error code if command is not supported */ + fp = popen(cmd, "r"); + if (!fp) { + fprintf(stderr, "%s: %s\n", progname, strerror(errno)); + return 0; + } + + ret = fread(enabled_features, 1, sizeof(enabled_features) - 1, fp); + enabled_features[ret] = '\0'; + pclose(fp); + + if (strstr(enabled_features, feature)) + return 1; + return 0; +} + /* Write the server config files */ int ldiskfs_write_ldd(struct mkfs_opts *mop) { @@ -151,7 +265,8 @@ int ldiskfs_write_ldd(struct mkfs_opts *mop) dev = mop->mo_loopdev; ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0, - mop->mo_ldd.ldd_mount_opts); + (mop->mo_mountopts == NULL) ? + "errors=remount-ro" : mop->mo_mountopts); if (ret) { fprintf(stderr, "%s: Unable to mount %s: %s\n", progname, dev, strerror(errno)); @@ -191,6 +306,7 @@ int ldiskfs_write_ldd(struct mkfs_opts *mop) fclose(filep); goto out_umnt; } + fsync(filep->_fileno); fclose(filep); out_umnt: @@ -273,10 +389,19 @@ int ldiskfs_read_ldd(char *dev, struct lustre_disk_data *mo_ldd) return ret; } +int ldiskfs_erase_ldd(struct mkfs_opts *mop, char *param) +{ + return 0; +} + +void ldiskfs_print_ldd_params(struct mkfs_opts *mop) +{ + printf("Parameters:%s\n", mop->mo_ldd.ldd_params); +} /* Display the need for the latest e2fsprogs to be installed. make_backfs * indicates if the caller is make_lustre_backfs() or not. */ -void disp_old_e2fsprogs_msg(const char *feature, int make_backfs) +static void disp_old_e2fsprogs_msg(const char *feature, int make_backfs) { static int msg_displayed; @@ -293,7 +418,7 @@ void disp_old_e2fsprogs_msg(const char *feature, int make_backfs) E2FSPROGS, feature); #if !(HAVE_LDISKFSPROGS) fprintf(stderr, "Please install the latest version of e2fsprogs from\n" - "http://downloads.whamcloud.com/public/e2fsprogs/latest/\n" + "https://downloads.hpdd.intel.com/public/e2fsprogs/latest/\n" "to enable this feature.\n"); #endif if (make_backfs) @@ -386,7 +511,7 @@ static int is_e2fsprogs_feature_supp(const char *feature) } ret = fread(supp_features, 1, sizeof(supp_features) - 1, fp); supp_features[ret] = '\0'; - fclose(fp); + pclose(fp); } if (ret > 0 && strstr(supp_features, strncmp(feature, "-O ", 3) ? feature : feature+3)) @@ -437,8 +562,9 @@ static void append_unique(char *buf, char *prefix, char *key, char *val, strscat(buf, key, maxbuflen); if (val != NULL) { - strscat(buf, "=", maxbuflen); + strscat(buf, "=\"", maxbuflen); strscat(buf, val, maxbuflen); + strscat(buf, "\"", maxbuflen); } } } @@ -448,7 +574,7 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor, { if (IS_OST(&mop->mo_ldd)) { append_unique(anchor, user_spec ? "," : " -O ", - "extents", NULL, sizeof(mop->mo_mkfsopts)); + "extents", NULL, maxbuflen); append_unique(anchor, ",", "uninit_bg", NULL, maxbuflen); } else if (IS_MDT(&mop->mo_ldd)) { append_unique(anchor, user_spec ? "," : " -O ", @@ -492,7 +618,7 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor, append_unique(anchor, ",", "huge_file", NULL, maxbuflen); /* Enable large block addresses if the LUN is over 2^32 blocks. */ - if (mop->mo_device_sz / (L_BLOCK_SIZE >> 10) >= 0x100002000ULL && + if (mop->mo_device_kb / (L_BLOCK_SIZE >> 10) >= 0x100002000ULL && is_e2fsprogs_feature_supp("-O 64bit") == 0) append_unique(anchor, ",", "64bit", NULL, maxbuflen); @@ -504,7 +630,8 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor, append_unique(anchor, ",", "flex_bg", NULL, maxbuflen); - if (IS_OST(&mop->mo_ldd)) { + if (IS_OST(&mop->mo_ldd) && + strstr(mop->mo_mkfsopts, "-G") == NULL) { snprintf(tmp_buf, sizeof(tmp_buf), " -G %u", (1 << 20) / L_BLOCK_SIZE); strscat(anchor, tmp_buf, maxbuflen); @@ -523,6 +650,7 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor, */ static char *moveopts_to_end(char *start) { + size_t len; char save[512]; char *end, *idx; @@ -535,9 +663,13 @@ static char *moveopts_to_end(char *start) while (*end != ' ' && *end != '\0') ++end; + len = end - start; + if (len >= sizeof(save)) + len = sizeof(save) - 1; + /* save options */ - strncpy(save, start, end - start); - save[end - start] = '\0'; + strncpy(save, start, len); + save[len] = '\0'; /* move remaining options up front */ if (*end) @@ -556,7 +688,7 @@ static char *moveopts_to_end(char *start) /* Build fs according to type */ int ldiskfs_make_lustre(struct mkfs_opts *mop) { - __u64 device_sz = mop->mo_device_sz, block_count = 0; + __u64 device_kb = mop->mo_device_kb, block_count = 0; char mkfs_cmd[PATH_MAX]; char buf[64]; char *start; @@ -565,26 +697,26 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) size_t maxbuflen; if (!(mop->mo_flags & MO_IS_LOOP)) { - mop->mo_device_sz = get_device_size(mop->mo_device); + mop->mo_device_kb = get_device_size(mop->mo_device); - if (mop->mo_device_sz == 0) + if (mop->mo_device_kb == 0) return ENODEV; /* Compare to real size */ - if (device_sz == 0 || device_sz > mop->mo_device_sz) - device_sz = mop->mo_device_sz; + if (device_kb == 0 || device_kb > mop->mo_device_kb) + device_kb = mop->mo_device_kb; else - mop->mo_device_sz = device_sz; + mop->mo_device_kb = device_kb; } - if (mop->mo_device_sz != 0) { - if (mop->mo_device_sz < 8096){ + if (mop->mo_device_kb != 0) { + if (mop->mo_device_kb < 32384) { fprintf(stderr, "%s: size of filesystem must be larger " - "than 8MB, but is set to %lldKB\n", - progname, (long long)mop->mo_device_sz); + "than 32MB, but is set to %lldKB\n", + progname, (long long)mop->mo_device_kb); return EINVAL; } - block_count = mop->mo_device_sz / (L_BLOCK_SIZE >> 10); + block_count = mop->mo_device_kb / (L_BLOCK_SIZE >> 10); /* If the LUN size is just over 2^32 blocks, limit the * filesystem size to 2^32-1 blocks to avoid problems with * ldiskfs/mkfs not handling this size. Bug 22906 */ @@ -598,43 +730,69 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) long inode_size = 0; /* Journal size in MB */ - if (strstr(mop->mo_mkfsopts, "-J") == NULL) { + if (strstr(mop->mo_mkfsopts, "-J") == NULL && + device_kb > 1024 * 1024) { /* Choose our own default journal size */ - long journal_sz = 0, max_sz; - if (device_sz > 1024 * 1024) /* 1GB */ - journal_sz = (device_sz / 102400) * 4; - /* cap journal size at 1GB */ - if (journal_sz > 1024L) - journal_sz = 1024L; - /* man mkfs.ext3 */ - max_sz = (102400 * L_BLOCK_SIZE) >> 20; /* 400MB */ - if (journal_sz > max_sz) - journal_sz = max_sz; - if (journal_sz) { - sprintf(buf, " -J size=%ld", journal_sz); + long journal_mb = 0, max_mb; + + /* cap journal size at 4GB for MDT, + * leave it at 400MB for OSTs. */ + if (IS_MDT(&mop->mo_ldd)) + max_mb = 4096; + else if (IS_OST(&mop->mo_ldd)) + max_mb = 400; + else /* Use mke2fs default size for MGS */ + max_mb = 0; + + /* Use at most 4% of device for journal */ + journal_mb = device_kb * 4 / (1024 * 100); + if (journal_mb > max_mb) + journal_mb = max_mb; + + if (journal_mb) { + sprintf(buf, " -J size=%ld", journal_mb); strscat(mop->mo_mkfsopts, buf, sizeof(mop->mo_mkfsopts)); } } - /* Inode size (for extended attributes). The LOV EA size is - * 32 (EA hdr) + 32 (lov_mds_md) + stripes * 24 (lov_ost_data), - * and we want some margin above that for ACLs, other EAs... */ + /* + * The inode size is constituted by following elements + * (assuming all files are in composite layout and has + * 3 components): + * + * ldiskfs inode size: 156 + * extended attributes size, including: + * ext4_xattr_header: 32 + * LOV EA size: 32(lov_comp_md_v1) + + * 3 * 40(lov_comp_md_entry_v1) + + * 3 * 32(lov_mds_md) + + * stripes * 24(lov_ost_data) + + * 16(xattr_entry) + 3(lov) + * LMA EA size: 24(lustre_mdt_attrs) + + * 16(xattr_entry) + 3(lma) + * link EA size: 24(link_ea_header) + 18(link_ea_entry) + + * (filename) + 16(xattr_entry) + 4(link) + * and some margin for 4-byte alignment, ACLs and other EAs. + * + * If we say the average filename length is about 32 bytes, + * the calculation looks like: + * 156 + 32 + (32+3*(40 + 32)+24*N+19) + (24+19) + + * (24+18+~32+20) + other <= 512*2^m, {m=0,1,2,3} + */ if (strstr(mop->mo_mkfsopts, "-I") == NULL) { if (IS_MDT(&mop->mo_ldd)) { - if (mop->mo_stripe_count > 72) + if (mop->mo_stripe_count > 59) inode_size = 512; /* bz 7241 */ /* see also "-i" below for EA blocks */ - else if (mop->mo_stripe_count > 32) + else if (mop->mo_stripe_count > 16) inode_size = 2048; - else if (mop->mo_stripe_count > 10) - inode_size = 1024; else - inode_size = 512; + inode_size = 1024; } else if (IS_OST(&mop->mo_ldd)) { - /* We store MDS FID and OST objid in EA on OST - * we need to make inode bigger as well. */ - inode_size = 256; + /* We store MDS FID and necessary composite + * layout information in the OST object EA. */ + inode_size = 512; } if (inode_size > 0) { @@ -657,7 +815,7 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) if (IS_MDT(&mop->mo_ldd)) { bytes_per_inode = inode_size + 1536; - if (mop->mo_stripe_count > 72) { + if (mop->mo_stripe_count > 59) { int extra = mop->mo_stripe_count * 24; extra = ((extra - 1) | 4095) + 1; bytes_per_inode += extra; @@ -669,18 +827,18 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) * this, but it is impossible to know in advance. */ if (IS_OST(&mop->mo_ldd)) { /* OST > 16TB assume average file size 1MB */ - if (device_sz > (16ULL << 30)) + if (device_kb > (16ULL << 30)) bytes_per_inode = 1024 * 1024; /* OST > 4TB assume average file size 512kB */ - else if (device_sz > (4ULL << 30)) + else if (device_kb > (4ULL << 30)) bytes_per_inode = 512 * 1024; /* OST > 1TB assume average file size 256kB */ - else if (device_sz > (1ULL << 30)) + else if (device_kb > (1ULL << 30)) bytes_per_inode = 256 * 1024; /* OST > 10GB assume average file size 64kB, * plus a bit so that inodes will fit into a * 256x flex_bg without overflowing */ - else if (device_sz > (10ULL << 20)) + else if (device_kb > (10ULL << 20)) bytes_per_inode = 69905; } @@ -741,8 +899,9 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) * descriptor blocks, but leave one block for the superblock. * Only useful for filesystems with < 2^32 blocks due to resize * limitations. */ - if (IS_OST(&mop->mo_ldd) && mop->mo_device_sz > 100 * 1024 && - mop->mo_device_sz * 1024 / L_BLOCK_SIZE <= 0xffffffffULL) { + if (strstr(mop->mo_mkfsopts, "meta_bg") == NULL && + IS_OST(&mop->mo_ldd) && mop->mo_device_kb > 100 * 1024 && + mop->mo_device_kb * 1024 / L_BLOCK_SIZE <= 0xffffffffULL) { unsigned group_blocks = L_BLOCK_SIZE * 8; unsigned desc_per_block = L_BLOCK_SIZE / 32; unsigned resize_blks; @@ -781,8 +940,8 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) vprint("formatting backing filesystem %s on %s\n", MT_STR(&mop->mo_ldd), dev); - vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname); - vprint("\t4k blocks "LPU64"\n", block_count); + vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname); + vprint("\t4k blocks %ju\n", (uintmax_t)block_count); vprint("\toptions %s\n", mop->mo_mkfsopts); /* mkfs_cmd's trailing space is important! */ @@ -790,7 +949,8 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) strscat(mkfs_cmd, " ", sizeof(mkfs_cmd)); strscat(mkfs_cmd, dev, sizeof(mkfs_cmd)); if (block_count != 0) { - sprintf(buf, " "LPU64, block_count); + snprintf(buf, sizeof(buf), " %ju", + (uintmax_t)block_count); strscat(mkfs_cmd, buf, sizeof(mkfs_cmd)); } @@ -804,8 +964,7 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) } int ldiskfs_prepare_lustre(struct mkfs_opts *mop, - char *default_mountopts, int default_len, - char *always_mountopts, int always_len) + char *wanted_mountopts, size_t len) { struct lustre_disk_data *ldd = &mop->mo_ldd; int ret; @@ -818,14 +977,21 @@ int ldiskfs_prepare_lustre(struct mkfs_opts *mop, mop->mo_flags |= MO_IS_LOOP; } - strscat(default_mountopts, ",errors=remount-ro", default_len); if (IS_MDT(ldd) || IS_MGS(ldd)) - strscat(always_mountopts, ",user_xattr", always_len); + strscat(wanted_mountopts, ",user_xattr", len); return 0; } -int read_file(const char *path, char *buf, int size) +int ldiskfs_fix_mountopts(struct mkfs_opts *mop, char *mountopts, size_t len) +{ + if (strstr(mountopts, "errors=") == NULL) + strscat(mountopts, ",errors=remount-ro", len); + + return 0; +} + +static int read_file(const char *path, char *buf, int size) { FILE *fd; @@ -833,32 +999,38 @@ int read_file(const char *path, char *buf, int size) if (fd == NULL) return errno; - /* should not ignore fgets(3)'s return value */ - if (!fgets(buf, size, fd)) { + if (fgets(buf, size, fd) == NULL) { fprintf(stderr, "reading from %s: %s", path, strerror(errno)); fclose(fd); return 1; } fclose(fd); + + /* strip trailing newline */ + size = strlen(buf); + if (buf[size - 1] == '\n') + buf[size - 1] = '\0'; + return 0; } -int write_file(const char *path, const char *buf) +static int write_file(const char *path, const char *buf) { - FILE *fd; + int fd, rc; - fd = fopen(path, "w"); - if (fd == NULL) + fd = open(path, O_WRONLY); + if (fd < 0) return errno; - fputs(buf, fd); - fclose(fd); - return 0; + rc = write(fd, buf, strlen(buf)); + close(fd); + + return rc < 0 ? errno : 0; } -int set_blockdev_scheduler(const char *path, const char *scheduler) +static int set_blockdev_scheduler(const char *path, const char *scheduler) { - char buf[PATH_MAX], *c; + char buf[PATH_MAX], *s, *e, orig_sched[50]; int rc; /* Before setting the scheduler, we need to check to see if it's @@ -875,18 +1047,21 @@ int set_blockdev_scheduler(const char *path, const char *scheduler) } /* The expected format of buf: noop anticipatory deadline [cfq] */ - c = strchr(buf, '['); + s = strchr(buf, '['); + e = strchr(buf, ']'); - /* If c is NULL, the format is not what we expect. Play it safe - * and error out. */ - if (c == NULL) { + /* If the format is not what we expect. Play it safe and error out. */ + if (s == NULL || e == NULL) { if (verbose) fprintf(stderr, "%s: cannot parse scheduler " "options for '%s'\n", progname, path); return -EINVAL; } - if (strncmp(c+1, "noop", 4) == 0) + snprintf(orig_sched, e - s, "%s", s + 1); + + if (strcmp(orig_sched, "noop") == 0 || + strcmp(orig_sched, scheduler) == 0) return 0; rc = write_file(path, scheduler); @@ -896,6 +1071,9 @@ int set_blockdev_scheduler(const char *path, const char *scheduler) "'%s': %s\n", progname, path, strerror(errno)); return rc; + } else { + fprintf(stderr, "%s: change scheduler of %s from %s to %s\n", + progname, path, orig_sched, scheduler); } return rc; @@ -904,7 +1082,7 @@ int set_blockdev_scheduler(const char *path, const char *scheduler) /* This is to tune the kernel for good SCSI performance. * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb * to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */ -int set_blockdev_tunables(char *source, struct mount_opts *mop) +static int set_blockdev_tunables(char *source, struct mount_opts *mop) { glob_t glob_info = { 0 }; struct stat stat_buf; @@ -1021,7 +1199,7 @@ set_params: snprintf(buf, sizeof(buf), "%d", mop->mo_md_stripe_cache_size); rc = write_file(real_path, buf); - if (rc && verbose) + if (rc != 0 && verbose) fprintf(stderr, "warning: opening %s: %s\n", real_path, strerror(errno)); } @@ -1029,32 +1207,69 @@ set_params: return rc; } - snprintf(real_path, sizeof(real_path), "%s/%s", path, - MAX_HW_SECTORS_KB_PATH); - rc = read_file(real_path, buf, sizeof(buf)); - if (rc) { - if (verbose) - fprintf(stderr, "warning: opening %s: %s\n", - real_path, strerror(errno)); - /* No MAX_HW_SECTORS_KB_PATH isn't necessary an - * error for some device. */ - rc = 0; + if (mop->mo_max_sectors_kb >= 0) { + snprintf(buf, sizeof(buf), "%d", mop->mo_max_sectors_kb); + } else { + snprintf(real_path, sizeof(real_path), "%s/%s", path, + MAX_HW_SECTORS_KB_PATH); + rc = read_file(real_path, buf, sizeof(buf)); + if (rc) { + if (verbose) + fprintf(stderr, "warning: opening %s: %s\n", + real_path, strerror(errno)); + /* No MAX_HW_SECTORS_KB_PATH isn't necessary an + * error for some device. */ + goto subdevs; + } } if (strlen(buf) - 1 > 0) { + char oldbuf[32] = "", *end = NULL; + unsigned long long oldval, newval; + snprintf(real_path, sizeof(real_path), "%s/%s", path, MAX_SECTORS_KB_PATH); + rc = read_file(real_path, oldbuf, sizeof(oldbuf)); + /* Only set new parameter if different from the old one. */ + if (rc != 0 || strcmp(oldbuf, buf) == 0) { + /* No MAX_SECTORS_KB_PATH isn't necessary an + * error for some device. */ + goto subdevs; + } + + newval = strtoull(buf, &end, 0); + if (newval == 0 || newval == ULLONG_MAX || end == buf) + goto subdevs; + + /* Don't increase IO request size limit past 16MB. It is about + * PTLRPC_MAX_BRW_SIZE, but that isn't in a public header. + * Note that even though the block layer allows larger values, + * setting max_sectors_kb = 32768 causes crashes (LU-6974). */ + if (mop->mo_max_sectors_kb < 0 && newval > 16 * 1024) { + newval = 16 * 1024; + snprintf(buf, sizeof(buf), "%llu", newval); + } + + oldval = strtoull(oldbuf, &end, 0); + /* Don't shrink the current limit. */ + if (mop->mo_max_sectors_kb < 0 && oldval != ULLONG_MAX && + newval <= oldval) + goto subdevs; + rc = write_file(real_path, buf); - if (rc) { + if (rc != 0) { if (verbose) fprintf(stderr, "warning: writing to %s: %s\n", real_path, strerror(errno)); /* No MAX_SECTORS_KB_PATH isn't necessary an * error for some device. */ - rc = 0; + goto subdevs; } + fprintf(stderr, "%s: increased %s from %s to %s\n", + progname, real_path, oldbuf, buf); } +subdevs: /* Purposely ignore errors reported from set_blockdev_scheduler. * The worst that will happen is a block device with an "incorrect" * scheduler. */ @@ -1112,116 +1327,67 @@ int ldiskfs_label_lustre(struct mount_opts *mop) return rc; } -/* return canonicalized absolute pathname, even if the target file does not - * exist, unlike realpath */ -static char *absolute_path(char *devname) -{ - char buf[PATH_MAX + 1]; - char *path; - char *ptr; - - path = malloc(PATH_MAX + 1); - if (path == NULL) - return NULL; - - if (devname[0] != '/') { - if (getcwd(buf, sizeof(buf) - 1) == NULL) { - free(path); - return NULL; - } - strcat(buf, "/"); - if (strlen(devname) > sizeof(buf)-strlen(buf)-1) { - free(path); - return NULL; - } - strncat(buf, devname, sizeof(buf)-strlen(buf)-1); - } else { - if (strlen(devname) > sizeof(buf)-1) { - free(path); - return NULL; - } - strncpy(buf, devname, sizeof(buf)); - } - /* truncate filename before calling realpath */ - ptr = strrchr(buf, '/'); - if (ptr == NULL) { - free(path); - return NULL; - } - *ptr = '\0'; - if (path != realpath(buf, path)) { - free(path); - return NULL; - } - /* add the filename back */ - strcat(path, "/"); - strcat(path, ptr + 1); - return path; -} - -/* Determine if a device is a block device (as opposed to a file) */ -int is_block(char* devname) +int ldiskfs_rename_fsname(struct mkfs_opts *mop, const char *oldname) { - struct stat st; - int ret = 0; - char *devpath; + struct mount_opts opts; + struct lustre_disk_data *ldd = &mop->mo_ldd; + char mntpt[] = "/tmp/mntXXXXXX"; + char *dev; + int ret; - devpath = absolute_path(devname); - if (devpath == NULL) { - fprintf(stderr, "%s: failed to resolve path to %s\n", - progname, devname); - return -1; + /* Change the filesystem label. */ + opts.mo_ldd = *ldd; + opts.mo_source = mop->mo_device; + ret = ldiskfs_label_lustre(&opts); + if (ret) { + if (errno != 0) + ret = errno; + fprintf(stderr, "Can't change filesystem label: %s\n", + strerror(ret)); + return ret; } - ret = access(devname, F_OK); - if (ret != 0) { - if (strncmp(devpath, "/dev/", 5) == 0) { - /* nobody sane wants to create a loopback file under - * /dev. Let's just report the device doesn't exist */ - fprintf(stderr, "%s: %s apparently does not exist\n", - progname, devpath); - ret = -1; - goto out; - } - ret = 0; - goto out; - } - ret = stat(devpath, &st); - if (ret != 0) { - fprintf(stderr, "%s: cannot stat %s\n", progname, devpath); - goto out; + /* Mount this device temporarily in order to write these files */ + if (mkdtemp(mntpt) == NULL) { + if (errno != 0) + ret = errno; + else + ret = EINVAL; + fprintf(stderr, "Can't create temp mount point %s: %s\n", + mntpt, strerror(ret)); + return ret; } - ret = S_ISBLK(st.st_mode); -out: - free(devpath); - return ret; -} -static int is_feature_enabled(const char *feature, const char *devpath) -{ - char cmd[PATH_MAX]; - FILE *fp; - char enabled_features[4096] = ""; - int ret = 1; - - snprintf(cmd, sizeof(cmd), "%s -R features %s 2>&1", - DEBUGFS, devpath); +#ifdef HAVE_SELINUX + /* + * Append file context to mount options if SE Linux is enabled + */ + if (is_selinux_enabled() > 0) + append_context_for_mount(mntpt, mop); +#endif - /* Using popen() instead of run_command() since debugfs does - * not return proper error code if command is not supported */ - fp = popen(cmd, "r"); - if (!fp) { - fprintf(stderr, "%s: %s\n", progname, strerror(errno)); - return 0; + if (mop->mo_flags & MO_IS_LOOP) + dev = mop->mo_loopdev; + else + dev = mop->mo_device; + ret = mount(dev, mntpt, MT_STR(ldd), 0, ldd->ldd_mount_opts); + if (ret) { + if (errno != 0) + ret = errno; + fprintf(stderr, "Unable to mount %s: %s\n", + dev, strerror(ret)); + if (ret == ENODEV) + fprintf(stderr, "Is the %s module available?\n", + MT_STR(ldd)); + goto out_rmdir; } - ret = fread(enabled_features, 1, sizeof(enabled_features) - 1, fp); - enabled_features[ret] = '\0'; - fclose(fp); + ret = lustre_rename_fsname(mop, mntpt, oldname); + umount(mntpt); - if (strstr(enabled_features, feature)) - return 1; - return 0; +out_rmdir: + rmdir(mntpt); + return ret; } /* Enable quota accounting */