X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Futils%2Flibmount_utils_ldiskfs.c;h=c1f79ae2df1fdbeb6a207007e05419ec6caeca33;hb=2f8d7b4679de3fa467040aa61733f262714e39c9;hp=c6ece221ec748691e7df2259c55edb6522b1753c;hpb=6712478e79588e73e28c7ccac3afc7ac2368a4f3;p=fs%2Flustre-release.git diff --git a/lustre/utils/libmount_utils_ldiskfs.c b/lustre/utils/libmount_utils_ldiskfs.c index c6ece22..c1f79ae 100644 --- a/lustre/utils/libmount_utils_ldiskfs.c +++ b/lustre/utils/libmount_utils_ldiskfs.c @@ -23,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2012, 2016, Intel Corporation. + * Copyright (c) 2012, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -49,14 +49,15 @@ #include #include #include +#include #include #include -#include #include #include #include #include +#include #include #include @@ -66,10 +67,12 @@ #ifndef BLKGETSIZE64 #include /* for BLKGETSIZE64 */ #endif +#include #include #include #include #include +#include #ifdef HAVE_SELINUX #include @@ -120,57 +123,6 @@ static void append_context_for_mount(char *mntpt, struct mkfs_opts *mop) } #endif -/* return canonicalized absolute pathname, even if the target file does not - * exist, unlike realpath */ -static char *absolute_path(char *devname) -{ - char buf[PATH_MAX + 1] = ""; - char *path; - char *ptr; - int len; - - path = malloc(sizeof(buf)); - if (path == NULL) - return NULL; - - if (devname[0] != '/') { - if (getcwd(buf, sizeof(buf) - 1) == NULL) { - free(path); - return NULL; - } - len = snprintf(path, sizeof(buf), "%s/%s", buf, devname); - if (len >= sizeof(buf)) { - free(path); - return NULL; - } - } else { - len = snprintf(path, sizeof(buf), "%s", devname); - if (len >= sizeof(buf)) { - free(path); - return NULL; - } - } - - /* truncate filename before calling realpath */ - ptr = strrchr(path, '/'); - if (ptr == NULL) { - free(path); - return NULL; - } - *ptr = '\0'; - if (buf != realpath(path, buf)) { - free(path); - return NULL; - } - /* add the filename back */ - len = snprintf(path, PATH_MAX, "%s/%s", buf, ptr+1); - if (len >= PATH_MAX) { - free(path); - return NULL; - } - return path; -} - /* Determine if a device is a block device (as opposed to a file) */ static int is_block(char *devname) { @@ -178,10 +130,10 @@ static int is_block(char *devname) int ret = 0; char *devpath; - devpath = absolute_path(devname); - if (devpath == NULL) { - fprintf(stderr, "%s: failed to resolve path to %s\n", - progname, devname); + ret = cfs_abs_path(devname, &devpath); + if (ret != 0) { + fprintf(stderr, "%s: failed to resolve path '%s': %s\n", + progname, devname, strerror(-ret)); return -1; } @@ -240,7 +192,7 @@ static int is_feature_enabled(const char *feature, const char *devpath) int ldiskfs_write_ldd(struct mkfs_opts *mop) { char mntpt[] = "/tmp/mntXXXXXX"; - char filepnm[128]; + char filepnm[192]; char *dev; FILE *filep; int ret = 0; @@ -436,7 +388,7 @@ static void disp_old_e2fsprogs_msg(const char *feature, int make_backfs) E2FSPROGS, feature); #if !(HAVE_LDISKFSPROGS) fprintf(stderr, "Please install the latest version of e2fsprogs from\n" - "https://downloads.hpdd.intel.com/public/e2fsprogs/latest/\n" + "https://downloads.whamcloud.com/public/e2fsprogs/latest/\n" "to enable this feature.\n"); #endif if (make_backfs) @@ -590,6 +542,13 @@ static void append_unique(char *buf, char *prefix, char *key, char *val, static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor, size_t maxbuflen, int user_spec) { + int enable_64bit = 0; + + /* Enable large block addresses if the LUN is over 2^32 blocks. */ + if ((mop->mo_device_kb / (L_BLOCK_SIZE >> 10) > UINT32_MAX) && + is_e2fsprogs_feature_supp("-O 64bit") == 0) + enable_64bit = 1; + if (IS_OST(&mop->mo_ldd)) { append_unique(anchor, user_spec ? "," : " -O ", "extents", NULL, maxbuflen); @@ -598,7 +557,10 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor, append_unique(anchor, user_spec ? "," : " -O ", "dirdata", NULL, maxbuflen); append_unique(anchor, ",", "uninit_bg", NULL, maxbuflen); - append_unique(anchor, ",", "^extents", NULL, maxbuflen); + if (enable_64bit) + append_unique(anchor, ",", "extents", NULL, maxbuflen); + else + append_unique(anchor, ",", "^extents", NULL, maxbuflen); } else { append_unique(anchor, user_spec ? "," : " -O ", "uninit_bg", NULL, maxbuflen); @@ -635,9 +597,7 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor, if (is_e2fsprogs_feature_supp("-O huge_file") == 0) append_unique(anchor, ",", "huge_file", NULL, maxbuflen); - /* Enable large block addresses if the LUN is over 2^32 blocks. */ - if (mop->mo_device_kb / (L_BLOCK_SIZE >> 10) >= 0x100002000ULL && - is_e2fsprogs_feature_supp("-O 64bit") == 0) + if (enable_64bit) append_unique(anchor, ",", "64bit", NULL, maxbuflen); /* Cluster inode/block bitmaps and inode table for more efficient IO. @@ -779,24 +739,26 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) * (assuming all files are in composite layout and has * 3 components): * - * ldiskfs inode size: 156 - * extended attributes size, including: + * ldiskfs inode size: 160 + * MDT extended attributes size, including: * ext4_xattr_header: 32 * LOV EA size: 32(lov_comp_md_v1) + * 3 * 40(lov_comp_md_entry_v1) + * 3 * 32(lov_mds_md) + * stripes * 24(lov_ost_data) + - * 16(xattr_entry) + 3(lov) + * 16(xattr_entry) + 4("lov") * LMA EA size: 24(lustre_mdt_attrs) + - * 16(xattr_entry) + 3(lma) + * 16(xattr_entry) + 4("lma") + * SOM EA size: 24(lustre_som_attrs) + + * 16(xattr_entry) + 4("som") * link EA size: 24(link_ea_header) + 18(link_ea_entry) + - * (filename) + 16(xattr_entry) + 4(link) + * 16(filename) + 16(xattr_entry) + 4("link") * and some margin for 4-byte alignment, ACLs and other EAs. * * If we say the average filename length is about 32 bytes, * the calculation looks like: - * 156 + 32 + (32+3*(40 + 32)+24*N+19) + (24+19) + - * (24+18+~32+20) + other <= 512*2^m, {m=0,1,2,3} + * 160 + 32 + (32+3*(40+32)+24*stripes+20) + (24+20) + (24+20) + + * (24+20) + (~42+16+20) + other <= 512*2^m, {m=0,1,2,3} */ if (strstr(mop->mo_mkfsopts, "-I") == NULL) { if (IS_MDT(&mop->mo_ldd)) { @@ -809,7 +771,16 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) inode_size = 1024; } else if (IS_OST(&mop->mo_ldd)) { /* We store MDS FID and necessary composite - * layout information in the OST object EA. */ + * layout information in the OST object EA: + * ldiskfs inode size: 160 + * OST extended attributes size, including: + * ext4_xattr_header: 32 + * LMA EA size: 24(lustre_mdt_attrs) + + * 16(xattr_entry) + 4("lma") + * FID EA size: 52(filter_fid) + + * 16(xattr_entry) + 4("fid") + * 160 + 32 + (24+20) + (52+20) = 308 + */ inode_size = 512; } @@ -864,6 +835,7 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) sprintf(buf, " -i %ld", bytes_per_inode); strscat(mop->mo_mkfsopts, buf, sizeof(mop->mo_mkfsopts)); + mop->mo_inode_size = bytes_per_inode; } } @@ -1046,290 +1018,307 @@ static int write_file(const char *path, const char *buf) return rc < 0 ? errno : 0; } -static int set_blockdev_scheduler(const char *path, const char *scheduler) +static int tune_md_stripe_cache_size(const char *sys_path, + struct mount_opts *mop) { - char buf[PATH_MAX], *s, *e, orig_sched[50]; + char path[PATH_MAX]; + unsigned long old_stripe_cache_size; + unsigned long new_stripe_cache_size; + char buf[3 * sizeof(old_stripe_cache_size) + 2]; int rc; - /* Before setting the scheduler, we need to check to see if it's - * already set to "noop". If it is, we don't want to override - * that setting. If it's set to anything other than "noop", set - * the scheduler to what has been passed in. */ + if (mop->mo_md_stripe_cache_size <= 0) + return 0; + + new_stripe_cache_size = mop->mo_md_stripe_cache_size; + snprintf(path, sizeof(path), "%s/%s", sys_path, STRIPE_CACHE_SIZE); rc = read_file(path, buf, sizeof(buf)); - if (rc) { + if (rc != 0) { if (verbose) - fprintf(stderr, "%s: cannot open '%s': %s\n", - progname, path, strerror(errno)); + fprintf(stderr, "warning: cannot read '%s': %s\n", + path, strerror(errno)); return rc; } - /* The expected format of buf: noop anticipatory deadline [cfq] */ - s = strchr(buf, '['); - e = strchr(buf, ']'); - - /* If the format is not what we expect. Play it safe and error out. */ - if (s == NULL || e == NULL) { - if (verbose) - fprintf(stderr, "%s: cannot parse scheduler " - "options for '%s'\n", progname, path); - return -EINVAL; - } - - snprintf(orig_sched, e - s, "%s", s + 1); + old_stripe_cache_size = strtoul(buf, NULL, 0); + if (old_stripe_cache_size == 0 || old_stripe_cache_size == ULONG_MAX) + return EINVAL; - if (strcmp(orig_sched, "noop") == 0 || - strcmp(orig_sched, scheduler) == 0) + if (new_stripe_cache_size <= old_stripe_cache_size) return 0; - rc = write_file(path, scheduler); - if (rc) { + snprintf(buf, sizeof(buf), "%lu", new_stripe_cache_size); + rc = write_file(path, buf); + if (rc != 0) { if (verbose) - fprintf(stderr, "%s: cannot set scheduler on " - "'%s': %s\n", progname, path, - strerror(errno)); + fprintf(stderr, "warning: cannot write '%s': %s\n", + path, strerror(errno)); return rc; - } else { - fprintf(stderr, "%s: change scheduler of %s from %s to %s\n", - progname, path, orig_sched, scheduler); } - return rc; + return 0; } -/* This is to tune the kernel for good SCSI performance. - * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb - * to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */ -static int set_blockdev_tunables(char *source, struct mount_opts *mop) +static int tune_max_sectors_kb(const char *sys_path, struct mount_opts *mop) { - glob_t glob_info = { 0 }; - struct stat stat_buf; - char *chk_major, *chk_minor; - char *savept = NULL, *dev; - char *ret_path; - char buf[PATH_MAX] = {'\0'}, path[PATH_MAX] = {'\0'}; - char real_path[PATH_MAX] = {'\0'}; - int i, rc = 0; - int major, minor; - char *slave = NULL; - - if (!source) - return -EINVAL; - - ret_path = realpath(source, real_path); - if (ret_path == NULL) { - if (verbose) - fprintf(stderr, "warning: %s: cannot resolve: %s\n", - source, strerror(errno)); - return -EINVAL; + char path[PATH_MAX]; + unsigned long max_hw_sectors_kb; + unsigned long old_max_sectors_kb; + unsigned long new_max_sectors_kb; + char buf[3 * sizeof(old_max_sectors_kb) + 2]; + int rc; + + if (mop->mo_max_sectors_kb >= 0) { + new_max_sectors_kb = mop->mo_max_sectors_kb; + goto have_new_max_sectors_kb; } - if (strncmp(real_path, "/dev/loop", 9) == 0) + snprintf(path, sizeof(path), "%s/%s", sys_path, MAX_HW_SECTORS_KB_PATH); + rc = read_file(path, buf, sizeof(buf)); + if (rc != 0) { + /* No MAX_HW_SECTORS_KB_PATH isn't necessary an + * error for some devices. */ return 0; + } - if ((real_path[0] != '/') && (strpbrk(real_path, ",:") != NULL)) + max_hw_sectors_kb = strtoul(buf, NULL, 0); + if (max_hw_sectors_kb == 0 || max_hw_sectors_kb == ULLONG_MAX) { + /* No digits at all or something weird. */ return 0; + } - snprintf(path, sizeof(path), "/sys/block%s", real_path + 4); - if (access(path, X_OK) == 0) - goto set_params; + new_max_sectors_kb = max_hw_sectors_kb; - /* The name of the device say 'X' specified in /dev/X may not - * match any entry under /sys/block/. In that case we need to - * match the major/minor number to find the entry under - * sys/block corresponding to /dev/X */ + /* Don't increase IO request size limit past 16MB. It is + * about PTLRPC_MAX_BRW_SIZE, but that isn't in a public + * header. Note that even though the block layer allows + * larger values, setting max_sectors_kb = 32768 causes + * crashes (LU-6974). */ + if (new_max_sectors_kb > 16 * 1024) + new_max_sectors_kb = 16 * 1024; - /* Don't chop tail digit on /dev/mapper/xxx, LU-478 */ - if (strncmp(real_path, "/dev/mapper", 11) != 0) { - dev = real_path + strlen(real_path); - while (--dev > real_path && isdigit(*dev)) - *dev = 0; +have_new_max_sectors_kb: + snprintf(path, sizeof(path), "%s/%s", sys_path, MAX_SECTORS_KB_PATH); + rc = read_file(path, buf, sizeof(buf)); + if (rc != 0) { + /* No MAX_SECTORS_KB_PATH isn't necessary an error for + * some devices. */ + return 0; + } - if (strncmp(real_path, "/dev/md", 7) == 0 && dev[0] == 'p') - *dev = 0; + old_max_sectors_kb = strtoul(buf, NULL, 0); + if (old_max_sectors_kb == 0 || old_max_sectors_kb == ULLONG_MAX) { + /* No digits at all or something weird. */ + return 0; } - rc = stat(real_path, &stat_buf); - if (rc) { + if (new_max_sectors_kb <= old_max_sectors_kb) + return 0; + + snprintf(buf, sizeof(buf), "%lu", new_max_sectors_kb); + rc = write_file(path, buf); + if (rc != 0) { if (verbose) - fprintf(stderr, "warning: %s, device %s stat failed\n", - strerror(errno), real_path); + fprintf(stderr, "warning: cannot write '%s': %s\n", + path, strerror(errno)); return rc; } - major = major(stat_buf.st_rdev); - minor = minor(stat_buf.st_rdev); - rc = glob("/sys/block/*", GLOB_NOSORT, NULL, &glob_info); - if (rc) { + fprintf(stderr, "%s: increased '%s' from %lu to %lu\n", + progname, path, old_max_sectors_kb, new_max_sectors_kb); + + return 0; +} + +static int tune_block_dev_scheduler(const char *sys_path, const char *new_sched) +{ + char path[PATH_MAX]; + char buf[PATH_MAX]; + char *s, *e; + char *old_sched; + int rc; + + /* Before setting the scheduler, we need to check to see if + * it's already set to "noop". If it is then we don't want to + * override that setting. If it's set to anything other than + * "noop" then set the scheduler to what has been passed + * in. */ + + snprintf(path, sizeof(path), "%s/%s", sys_path, SCHEDULER_PATH); + rc = read_file(path, buf, sizeof(buf)); + if (rc != 0) { if (verbose) - fprintf(stderr, "warning: failed to read entries under " - "/sys/block\n"); - globfree(&glob_info); + fprintf(stderr, "%s: cannot read '%s': %s\n", + progname, path, strerror(errno)); + return rc; } - for (i = 0; i < glob_info.gl_pathc; i++){ - snprintf(path, sizeof(path), "%s/dev", glob_info.gl_pathv[i]); - - rc = read_file(path, buf, sizeof(buf)); - if (rc) - continue; + /* The expected format of buf: noop anticipatory deadline [cfq] */ + s = strchr(buf, '['); + e = strchr(buf, ']'); - if (buf[strlen(buf) - 1] == '\n') - buf[strlen(buf) - 1] = '\0'; + /* If the format is not what we expect then be safe and error out. */ + if (s == NULL || e == NULL || !(s < e)) { + if (verbose) + fprintf(stderr, + "%s: cannot parse scheduler options for '%s'\n", + progname, path); - chk_major = strtok_r(buf, ":", &savept); - chk_minor = savept; - if (chk_major != NULL && major == atoi(chk_major) && - chk_minor != NULL && minor == atoi(chk_minor)) - break; + return EINVAL; } - if (i == glob_info.gl_pathc) { + old_sched = s + 1; + *e = '\0'; + + if (strcmp(old_sched, "noop") == 0 || + strcmp(old_sched, new_sched) == 0) + return 0; + + rc = write_file(path, new_sched); + if (rc != 0) { if (verbose) - fprintf(stderr,"warning: device %s does not match any " - "entry under /sys/block\n", real_path); - globfree(&glob_info); - return -EINVAL; + fprintf(stderr, + "%s: cannot set scheduler on '%s': %s\n", + progname, path, strerror(errno)); + return rc; } - /* Chop off "/dev" from path we found */ - path[strlen(glob_info.gl_pathv[i])] = '\0'; - globfree(&glob_info); + fprintf(stderr, "%s: changed scheduler of '%s' from %s to %s\n", + progname, path, old_sched, new_sched); -set_params: - if (strncmp(real_path, "/dev/md", 7) == 0) { - snprintf(real_path, sizeof(real_path), "%s/%s", path, - STRIPE_CACHE_SIZE); + return 0; +} - rc = read_file(real_path, buf, sizeof(buf)); - if (rc) { - if (verbose) - fprintf(stderr, "warning: opening %s: %s\n", - real_path, strerror(errno)); - return 0; - } +static int tune_block_dev(const char *src, struct mount_opts *mop); - if (atoi(buf) >= mop->mo_md_stripe_cache_size) +static int tune_block_dev_slaves(const char *sys_path, struct mount_opts *mop) +{ + char slaves_path[PATH_MAX]; + DIR *slaves_dir; + struct dirent *d; + int rc = 0; + + snprintf(slaves_path, sizeof(slaves_path), "%s/slaves", sys_path); + slaves_dir = opendir(slaves_path); + if (slaves_dir == NULL) { + if (errno == ENOENT) return 0; - if (strlen(buf) - 1 > 0) { - snprintf(buf, sizeof(buf), "%d", - mop->mo_md_stripe_cache_size); - rc = write_file(real_path, buf); - if (rc != 0 && verbose) - fprintf(stderr, "warning: opening %s: %s\n", - real_path, strerror(errno)); - } - /* Return since raid and disk tunables are different */ - return rc; + return errno; } - if (mop->mo_max_sectors_kb >= 0) { - snprintf(buf, sizeof(buf), "%d", mop->mo_max_sectors_kb); - } else { - snprintf(real_path, sizeof(real_path), "%s/%s", path, - MAX_HW_SECTORS_KB_PATH); - rc = read_file(real_path, buf, sizeof(buf)); - if (rc) { - if (verbose) - fprintf(stderr, "warning: opening %s: %s\n", - real_path, strerror(errno)); - /* No MAX_HW_SECTORS_KB_PATH isn't necessary an - * error for some device. */ - goto subdevs; - } + while ((d = readdir(slaves_dir)) != NULL) { + char path[PATH_MAX]; + int rc2; + + if (d->d_type != DT_LNK) + continue; + + snprintf(path, sizeof(path), "%s/%s", slaves_path, d->d_name); + rc2 = tune_block_dev(path, mop); + if (rc2 != 0) + rc = rc2; } - if (strlen(buf) - 1 > 0) { - char oldbuf[32] = "", *end = NULL; - unsigned long long oldval, newval; - - snprintf(real_path, sizeof(real_path), "%s/%s", path, - MAX_SECTORS_KB_PATH); - rc = read_file(real_path, oldbuf, sizeof(oldbuf)); - /* Only set new parameter if different from the old one. */ - if (rc != 0 || strcmp(oldbuf, buf) == 0) { - /* No MAX_SECTORS_KB_PATH isn't necessary an - * error for some device. */ - goto subdevs; - } + closedir(slaves_dir); - newval = strtoull(buf, &end, 0); - if (newval == 0 || newval == ULLONG_MAX || end == buf) - goto subdevs; - - /* Don't increase IO request size limit past 16MB. It is about - * PTLRPC_MAX_BRW_SIZE, but that isn't in a public header. - * Note that even though the block layer allows larger values, - * setting max_sectors_kb = 32768 causes crashes (LU-6974). */ - if (mop->mo_max_sectors_kb < 0 && newval > 16 * 1024) { - newval = 16 * 1024; - snprintf(buf, sizeof(buf), "%llu", newval); - } + return rc; +} - oldval = strtoull(oldbuf, &end, 0); - /* Don't shrink the current limit. */ - if (mop->mo_max_sectors_kb < 0 && oldval != ULLONG_MAX && - newval <= oldval) - goto subdevs; - - rc = write_file(real_path, buf); - if (rc != 0) { - if (verbose) - fprintf(stderr, "warning: writing to %s: %s\n", - real_path, strerror(errno)); - /* No MAX_SECTORS_KB_PATH isn't necessary an - * error for some device. */ - goto subdevs; - } - fprintf(stderr, "%s: increased %s from %s to %s\n", - progname, real_path, oldbuf, buf); +/* This is to tune the kernel for good SCSI performance. + * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb + * to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */ +static int tune_block_dev(const char *src, struct mount_opts *mop) +{ + struct stat st; + char sys_path[PATH_MAX]; + char partition_path[PATH_MAX]; + char *real_sys_path = NULL; + int rc; + + /* + * Don't apply block device tuning for MDT or MGT devices, + * since we don't need huge IO sizes to get good performance + */ + if (!IS_OST(&mop->mo_ldd)) + return 0; + + if (src == NULL) + return EINVAL; + + rc = stat(src, &st); + if (rc < 0) { + if (verbose) + fprintf(stderr, "warning: cannot stat '%s': %s\n", + src, strerror(errno)); + return errno; } -subdevs: - /* Purposely ignore errors reported from set_blockdev_scheduler. - * The worst that will happen is a block device with an "incorrect" - * scheduler. */ - snprintf(real_path, sizeof(real_path), "%s/%s", path, SCHEDULER_PATH); - set_blockdev_scheduler(real_path, DEFAULT_SCHEDULER); - - /* if device is multipath device, tune its slave devices */ - glob_info.gl_pathc = 0; - glob_info.gl_offs = 0; - snprintf(real_path, sizeof(real_path), "%s/slaves/*", path); - rc = glob(real_path, GLOB_NOSORT, NULL, &glob_info); - - for (i = 0; rc == 0 && i < glob_info.gl_pathc; i++) { - slave = basename(glob_info.gl_pathv[i]); - snprintf(real_path, sizeof(real_path), "/dev/%s", slave); - rc = set_blockdev_tunables(real_path, mop); + if (!S_ISBLK(st.st_mode)) + return 0; + + if (major(st.st_rdev) == LOOP_MAJOR) + return 0; + + snprintf(sys_path, sizeof(sys_path), "/sys/dev/block/%u:%u", + major(st.st_rdev), minor(st.st_rdev)); + + snprintf(partition_path, sizeof(partition_path), "%s/partition", + sys_path); + + rc = access(partition_path, F_OK); + if (rc < 0) { + if (errno == ENOENT) + goto have_whole_dev; + + if (verbose) + fprintf(stderr, "warning: cannot access '%s': %s\n", + partition_path, strerror(errno)); + rc = errno; + goto out; } - if (rc == GLOB_NOMATCH) { - /* no slave device is not an error */ - rc = 0; - } else if (rc && verbose) { - if (slave == NULL) { - fprintf(stderr, "warning: %s, failed to read" - " entries under %s/slaves\n", - strerror(errno), path); - } else { - fprintf(stderr, "unable to set tunables for" - " slave device %s (slave would be" - " unable to handle IO request from" - " master %s)\n", - real_path, source); - } + snprintf(sys_path, sizeof(sys_path), "/sys/dev/block/%u:%u/..", + major(st.st_rdev), minor(st.st_rdev)); + +have_whole_dev: + /* Since we recurse on slave devices we resolve the sys_path to + * avoid path buffer overflows. */ + real_sys_path = realpath(sys_path, NULL); + if (real_sys_path == NULL) { + if (verbose) + fprintf(stderr, + "warning: cannot resolve '%s': %s\n", + sys_path, strerror(errno)); + rc = errno; + goto out; } - globfree(&glob_info); + + if (major(st.st_rdev) == MD_MAJOR) { + rc = tune_md_stripe_cache_size(real_sys_path, mop); + } else { + /* Ignore errors from tune_max_sectors_kb() and + * tune_scheduler(). The worst that will happen is a block + * device with an "incorrect" scheduler. */ + tune_max_sectors_kb(real_sys_path, mop); + tune_block_dev_scheduler(real_sys_path, DEFAULT_SCHEDULER); + + /* If device is multipath device then tune its slave + * devices. */ + rc = tune_block_dev_slaves(real_sys_path, mop); + } + +out: + free(real_sys_path); return rc; } int ldiskfs_tune_lustre(char *dev, struct mount_opts *mop) { - return set_blockdev_tunables(dev, mop); + return tune_block_dev(dev, mop); } int ldiskfs_label_lustre(struct mount_opts *mop)