X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Futils%2Fmount_utils_ldiskfs.c;h=6df4e45fddbae282f4fff106c40a1b05ad612874;hp=b8efddc1ef73dcbc56effde4cef5a4a7bb2376f3;hb=4df63615669a69b51c752cc4e416f705f8a56197;hpb=cc5ef6ae5412c3e94061d949ef684036eb003f27 diff --git a/lustre/utils/mount_utils_ldiskfs.c b/lustre/utils/mount_utils_ldiskfs.c index b8efddc..6df4e45 100644 --- a/lustre/utils/mount_utils_ldiskfs.c +++ b/lustre/utils/mount_utils_ldiskfs.c @@ -27,7 +27,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2012, Intel Corporation. + * Copyright (c) 2012, 2013, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -80,10 +80,17 @@ #include #include +#ifdef HAVE_SELINUX +#include +#endif + #define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb" #define MAX_SECTORS_KB_PATH "queue/max_sectors_kb" +#define SCHEDULER_PATH "queue/scheduler" #define STRIPE_CACHE_SIZE "md/stripe_cache_size" +#define DEFAULT_SCHEDULER "deadline" + extern char *progname; #define L_BLOCK_SIZE 4096 @@ -91,6 +98,145 @@ extern char *progname; #define DUMMY_FILE_NAME_LEN 25 #define EXT3_DIRENT_SIZE DUMMY_FILE_NAME_LEN +/* + * Concatenate context of the temporary mount point iff selinux is enabled + */ +#ifdef HAVE_SELINUX +static void append_context_for_mount(char *mntpt, struct mkfs_opts *mop) +{ + security_context_t fcontext; + + if (getfilecon(mntpt, &fcontext) < 0) { + /* Continuing with default behaviour */ + fprintf(stderr, "%s: Get file context failed : %s\n", + progname, strerror(errno)); + return; + } + + if (fcontext != NULL) { + strcat(mop->mo_ldd.ldd_mount_opts, ",context="); + strcat(mop->mo_ldd.ldd_mount_opts, fcontext); + freecon(fcontext); + } +} +#endif + +/* return canonicalized absolute pathname, even if the target file does not + * exist, unlike realpath */ +static char *absolute_path(char *devname) +{ + char buf[PATH_MAX + 1] = ""; + char *path; + char *ptr; + int len; + + path = malloc(sizeof(buf)); + if (path == NULL) + return NULL; + + if (devname[0] != '/') { + if (getcwd(buf, sizeof(buf) - 1) == NULL) { + free(path); + return NULL; + } + len = snprintf(path, sizeof(buf), "%s/%s", buf, devname); + if (len >= sizeof(buf)) { + free(path); + return NULL; + } + } else { + len = snprintf(path, sizeof(buf), "%s", devname); + if (len >= sizeof(buf)) { + free(path); + return NULL; + } + } + + /* truncate filename before calling realpath */ + ptr = strrchr(path, '/'); + if (ptr == NULL) { + free(path); + return NULL; + } + *ptr = '\0'; + if (buf != realpath(path, buf)) { + free(path); + return NULL; + } + /* add the filename back */ + len = snprintf(path, PATH_MAX, "%s/%s", buf, ptr+1); + if (len >= PATH_MAX) { + free(path); + return NULL; + } + return path; +} + +/* Determine if a device is a block device (as opposed to a file) */ +static int is_block(char *devname) +{ + struct stat st; + int ret = 0; + char *devpath; + + devpath = absolute_path(devname); + if (devpath == NULL) { + fprintf(stderr, "%s: failed to resolve path to %s\n", + progname, devname); + return -1; + } + + ret = access(devname, F_OK); + if (ret != 0) { + if (strncmp(devpath, "/dev/", 5) == 0) { + /* nobody sane wants to create a loopback file under + * /dev. Let's just report the device doesn't exist */ + fprintf(stderr, "%s: %s apparently does not exist\n", + progname, devpath); + ret = -1; + goto out; + } + ret = 0; + goto out; + } + ret = stat(devpath, &st); + if (ret != 0) { + fprintf(stderr, "%s: cannot stat %s\n", progname, devpath); + goto out; + } + ret = S_ISBLK(st.st_mode); +out: + free(devpath); + return ret; +} + +static int is_feature_enabled(const char *feature, const char *devpath) +{ + char cmd[PATH_MAX]; + FILE *fp; + char enabled_features[4096] = ""; + int ret = 1; + + snprintf(cmd, sizeof(cmd), "%s -R features %s 2>&1", + DEBUGFS, devpath); + + /* Using popen() instead of run_command() since debugfs does + * not return proper error code if command is not supported */ + fp = popen(cmd, "r"); + if (!fp) { + fprintf(stderr, "%s: %s\n", progname, strerror(errno)); + return 0; + } + + ret = fread(enabled_features, 1, sizeof(enabled_features) - 1, fp); + enabled_features[ret] = '\0'; + pclose(fp); + + if (strstr(enabled_features, feature)) + return 1; + return 0; +} + /* Write the server config files */ int ldiskfs_write_ldd(struct mkfs_opts *mop) { @@ -108,6 +254,14 @@ int ldiskfs_write_ldd(struct mkfs_opts *mop) return errno; } + /* + * Append file context to mount options if SE Linux is enabled + */ + #ifdef HAVE_SELINUX + if (is_selinux_enabled() > 0) + append_context_for_mount(mntpt, mop); + #endif + dev = mop->mo_device; if (mop->mo_flags & MO_IS_LOOP) dev = mop->mo_loopdev; @@ -238,7 +392,7 @@ int ldiskfs_read_ldd(char *dev, struct lustre_disk_data *mo_ldd) /* Display the need for the latest e2fsprogs to be installed. make_backfs * indicates if the caller is make_lustre_backfs() or not. */ -void disp_old_e2fsprogs_msg(const char *feature, int make_backfs) +static void disp_old_e2fsprogs_msg(const char *feature, int make_backfs) { static int msg_displayed; @@ -255,7 +409,7 @@ void disp_old_e2fsprogs_msg(const char *feature, int make_backfs) E2FSPROGS, feature); #if !(HAVE_LDISKFSPROGS) fprintf(stderr, "Please install the latest version of e2fsprogs from\n" - "http://downloads.whamcloud.com/public/e2fsprogs/latest/\n" + "https://downloads.hpdd.intel.com/public/e2fsprogs/latest/\n" "to enable this feature.\n"); #endif if (make_backfs) @@ -348,7 +502,7 @@ static int is_e2fsprogs_feature_supp(const char *feature) } ret = fread(supp_features, 1, sizeof(supp_features) - 1, fp); supp_features[ret] = '\0'; - fclose(fp); + pclose(fp); } if (ret > 0 && strstr(supp_features, strncmp(feature, "-O ", 3) ? feature : feature+3)) @@ -454,7 +608,7 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor, append_unique(anchor, ",", "huge_file", NULL, maxbuflen); /* Enable large block addresses if the LUN is over 2^32 blocks. */ - if (mop->mo_device_sz / (L_BLOCK_SIZE >> 10) >= 0x100002000ULL && + if (mop->mo_device_kb / (L_BLOCK_SIZE >> 10) >= 0x100002000ULL && is_e2fsprogs_feature_supp("-O 64bit") == 0) append_unique(anchor, ",", "64bit", NULL, maxbuflen); @@ -485,6 +639,7 @@ static int enable_default_ext4_features(struct mkfs_opts *mop, char *anchor, */ static char *moveopts_to_end(char *start) { + size_t len; char save[512]; char *end, *idx; @@ -497,9 +652,13 @@ static char *moveopts_to_end(char *start) while (*end != ' ' && *end != '\0') ++end; + len = end - start; + if (len >= sizeof(save)) + len = sizeof(save) - 1; + /* save options */ - strncpy(save, start, end - start); - save[end - start] = '\0'; + strncpy(save, start, len); + save[len] = '\0'; /* move remaining options up front */ if (*end) @@ -518,7 +677,7 @@ static char *moveopts_to_end(char *start) /* Build fs according to type */ int ldiskfs_make_lustre(struct mkfs_opts *mop) { - __u64 device_sz = mop->mo_device_sz, block_count = 0; + __u64 device_kb = mop->mo_device_kb, block_count = 0; char mkfs_cmd[PATH_MAX]; char buf[64]; char *start; @@ -527,26 +686,26 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) size_t maxbuflen; if (!(mop->mo_flags & MO_IS_LOOP)) { - mop->mo_device_sz = get_device_size(mop->mo_device); + mop->mo_device_kb = get_device_size(mop->mo_device); - if (mop->mo_device_sz == 0) + if (mop->mo_device_kb == 0) return ENODEV; /* Compare to real size */ - if (device_sz == 0 || device_sz > mop->mo_device_sz) - device_sz = mop->mo_device_sz; + if (device_kb == 0 || device_kb > mop->mo_device_kb) + device_kb = mop->mo_device_kb; else - mop->mo_device_sz = device_sz; + mop->mo_device_kb = device_kb; } - if (mop->mo_device_sz != 0) { - if (mop->mo_device_sz < 8096){ + if (mop->mo_device_kb != 0) { + if (mop->mo_device_kb < 8096) { fprintf(stderr, "%s: size of filesystem must be larger " "than 8MB, but is set to %lldKB\n", - progname, (long long)mop->mo_device_sz); + progname, (long long)mop->mo_device_kb); return EINVAL; } - block_count = mop->mo_device_sz / (L_BLOCK_SIZE >> 10); + block_count = mop->mo_device_kb / (L_BLOCK_SIZE >> 10); /* If the LUN size is just over 2^32 blocks, limit the * filesystem size to 2^32-1 blocks to avoid problems with * ldiskfs/mkfs not handling this size. Bug 22906 */ @@ -560,20 +719,27 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) long inode_size = 0; /* Journal size in MB */ - if (strstr(mop->mo_mkfsopts, "-J") == NULL) { + if (strstr(mop->mo_mkfsopts, "-J") == NULL && + device_kb > 1024 * 1024) { /* Choose our own default journal size */ - long journal_sz = 0, max_sz; - if (device_sz > 1024 * 1024) /* 1GB */ - journal_sz = (device_sz / 102400) * 4; - /* cap journal size at 1GB */ - if (journal_sz > 1024L) - journal_sz = 1024L; - /* man mkfs.ext3 */ - max_sz = (102400 * L_BLOCK_SIZE) >> 20; /* 400MB */ - if (journal_sz > max_sz) - journal_sz = max_sz; - if (journal_sz) { - sprintf(buf, " -J size=%ld", journal_sz); + long journal_mb = 0, max_mb; + + /* cap journal size at 4GB for MDT, + * leave it at 400MB for OSTs. */ + if (IS_MDT(&mop->mo_ldd)) + max_mb = 4096; + else if (IS_OST(&mop->mo_ldd)) + max_mb = 400; + else /* Use mke2fs default size for MGS */ + max_mb = 0; + + /* Use at most 4% of device for journal */ + journal_mb = device_kb * 4 / (1024 * 100); + if (journal_mb > max_mb) + journal_mb = max_mb; + + if (journal_mb) { + sprintf(buf, " -J size=%ld", journal_mb); strscat(mop->mo_mkfsopts, buf, sizeof(mop->mo_mkfsopts)); } @@ -631,18 +797,18 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) * this, but it is impossible to know in advance. */ if (IS_OST(&mop->mo_ldd)) { /* OST > 16TB assume average file size 1MB */ - if (device_sz > (16ULL << 30)) + if (device_kb > (16ULL << 30)) bytes_per_inode = 1024 * 1024; /* OST > 4TB assume average file size 512kB */ - else if (device_sz > (4ULL << 30)) + else if (device_kb > (4ULL << 30)) bytes_per_inode = 512 * 1024; /* OST > 1TB assume average file size 256kB */ - else if (device_sz > (1ULL << 30)) + else if (device_kb > (1ULL << 30)) bytes_per_inode = 256 * 1024; /* OST > 10GB assume average file size 64kB, * plus a bit so that inodes will fit into a * 256x flex_bg without overflowing */ - else if (device_sz > (10ULL << 20)) + else if (device_kb > (10ULL << 20)) bytes_per_inode = 69905; } @@ -703,8 +869,8 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) * descriptor blocks, but leave one block for the superblock. * Only useful for filesystems with < 2^32 blocks due to resize * limitations. */ - if (IS_OST(&mop->mo_ldd) && mop->mo_device_sz > 100 * 1024 && - mop->mo_device_sz * 1024 / L_BLOCK_SIZE <= 0xffffffffULL) { + if (IS_OST(&mop->mo_ldd) && mop->mo_device_kb > 100 * 1024 && + mop->mo_device_kb * 1024 / L_BLOCK_SIZE <= 0xffffffffULL) { unsigned group_blocks = L_BLOCK_SIZE * 8; unsigned desc_per_block = L_BLOCK_SIZE / 32; unsigned resize_blks; @@ -787,7 +953,7 @@ int ldiskfs_prepare_lustre(struct mkfs_opts *mop, return 0; } -int read_file(char *path, char *buf, int size) +static int read_file(const char *path, char *buf, int size) { FILE *fd; @@ -805,7 +971,7 @@ int read_file(char *path, char *buf, int size) return 0; } -int write_file(char *path, char *buf) +static int write_file(const char *path, const char *buf) { FILE *fd; @@ -818,10 +984,55 @@ int write_file(char *path, char *buf) return 0; } +static int set_blockdev_scheduler(const char *path, const char *scheduler) +{ + char buf[PATH_MAX], *c; + int rc; + + /* Before setting the scheduler, we need to check to see if it's + * already set to "noop". If it is, we don't want to override + * that setting. If it's set to anything other than "noop", set + * the scheduler to what has been passed in. */ + + rc = read_file(path, buf, sizeof(buf)); + if (rc) { + if (verbose) + fprintf(stderr, "%s: cannot open '%s': %s\n", + progname, path, strerror(errno)); + return rc; + } + + /* The expected format of buf: noop anticipatory deadline [cfq] */ + c = strchr(buf, '['); + + /* If c is NULL, the format is not what we expect. Play it safe + * and error out. */ + if (c == NULL) { + if (verbose) + fprintf(stderr, "%s: cannot parse scheduler " + "options for '%s'\n", progname, path); + return -EINVAL; + } + + if (strncmp(c+1, "noop", 4) == 0) + return 0; + + rc = write_file(path, scheduler); + if (rc) { + if (verbose) + fprintf(stderr, "%s: cannot set scheduler on " + "'%s': %s\n", progname, path, + strerror(errno)); + return rc; + } + + return rc; +} + /* This is to tune the kernel for good SCSI performance. * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb * to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */ -int set_blockdev_tunables(char *source, struct mount_opts *mop, int fan_out) +static int set_blockdev_tunables(char *source, struct mount_opts *mop) { glob_t glob_info = { 0 }; struct stat stat_buf; @@ -832,6 +1043,7 @@ int set_blockdev_tunables(char *source, struct mount_opts *mop, int fan_out) char real_path[PATH_MAX] = {'\0'}; int i, rc = 0; int major, minor; + char *slave = NULL; if (!source) return -EINVAL; @@ -900,7 +1112,8 @@ int set_blockdev_tunables(char *source, struct mount_opts *mop, int fan_out) chk_major = strtok_r(buf, ":", &savept); chk_minor = savept; - if (major == atoi(chk_major) &&minor == atoi(chk_minor)) + if (chk_major != NULL && major == atoi(chk_major) && + chk_minor != NULL && minor == atoi(chk_minor)) break; } @@ -970,45 +1183,48 @@ set_params: } } - if (fan_out) { - char *slave = NULL; - glob_info.gl_pathc = 0; - glob_info.gl_offs = 0; - /* if device is multipath device, tune its slave devices */ - snprintf(real_path, sizeof(real_path), "%s/slaves/*", path); - rc = glob(real_path, GLOB_NOSORT, NULL, &glob_info); - - for (i = 0; rc == 0 && i < glob_info.gl_pathc; i++){ - slave = basename(glob_info.gl_pathv[i]); - snprintf(real_path, sizeof(real_path), "/dev/%s", slave); - rc = set_blockdev_tunables(real_path, mop, 0); - } + /* Purposely ignore errors reported from set_blockdev_scheduler. + * The worst that will happen is a block device with an "incorrect" + * scheduler. */ + snprintf(real_path, sizeof(real_path), "%s/%s", path, SCHEDULER_PATH); + set_blockdev_scheduler(real_path, DEFAULT_SCHEDULER); + + /* if device is multipath device, tune its slave devices */ + glob_info.gl_pathc = 0; + glob_info.gl_offs = 0; + snprintf(real_path, sizeof(real_path), "%s/slaves/*", path); + rc = glob(real_path, GLOB_NOSORT, NULL, &glob_info); + + for (i = 0; rc == 0 && i < glob_info.gl_pathc; i++) { + slave = basename(glob_info.gl_pathv[i]); + snprintf(real_path, sizeof(real_path), "/dev/%s", slave); + rc = set_blockdev_tunables(real_path, mop); + } - if (rc == GLOB_NOMATCH) { - /* no slave device is not an error */ - rc = 0; - } else if (rc && verbose) { - if (slave == NULL) { - fprintf(stderr, "warning: %s, failed to read" - " entries under %s/slaves\n", - strerror(errno), path); - } else { - fprintf(stderr, "unable to set tunables for" - " slave device %s (slave would be" - " unable to handle IO request from" - " master %s)\n", - real_path, source); - } + if (rc == GLOB_NOMATCH) { + /* no slave device is not an error */ + rc = 0; + } else if (rc && verbose) { + if (slave == NULL) { + fprintf(stderr, "warning: %s, failed to read" + " entries under %s/slaves\n", + strerror(errno), path); + } else { + fprintf(stderr, "unable to set tunables for" + " slave device %s (slave would be" + " unable to handle IO request from" + " master %s)\n", + real_path, source); } - globfree(&glob_info); } + globfree(&glob_info); return rc; } int ldiskfs_tune_lustre(char *dev, struct mount_opts *mop) { - return set_blockdev_tunables(dev, mop, 1); + return set_blockdev_tunables(dev, mop); } int ldiskfs_label_lustre(struct mount_opts *mop) @@ -1016,117 +1232,14 @@ int ldiskfs_label_lustre(struct mount_opts *mop) char label_cmd[PATH_MAX]; int rc; - snprintf(label_cmd, sizeof(label_cmd), E2LABEL" %s %s", - mop->mo_source, mop->mo_ldd.ldd_svname); + snprintf(label_cmd, sizeof(label_cmd), + TUNE2FS" -f -L '%s' '%s' >/dev/null 2>&1", + mop->mo_ldd.ldd_svname, mop->mo_source); rc = run_command(label_cmd, sizeof(label_cmd)); return rc; } -/* return canonicalized absolute pathname, even if the target file does not - * exist, unlike realpath */ -static char *absolute_path(char *devname) -{ - char buf[PATH_MAX + 1]; - char *path; - char *ptr; - - path = malloc(PATH_MAX + 1); - if (path == NULL) - return NULL; - - if (devname[0] != '/') { - if (getcwd(buf, sizeof(buf) - 1) == NULL) { - free(path); - return NULL; - } - strcat(buf, "/"); - strcat(buf, devname); - } else { - strcpy(buf, devname); - } - /* truncate filename before calling realpath */ - ptr = strrchr(buf, '/'); - if (ptr == NULL) { - free(path); - return NULL; - } - *ptr = '\0'; - if (path != realpath(buf, path)) { - free(path); - return NULL; - } - /* add the filename back */ - strcat(path, "/"); - strcat(path, ptr + 1); - return path; -} - -/* Determine if a device is a block device (as opposed to a file) */ -int is_block(char* devname) -{ - struct stat st; - int ret = 0; - char *devpath; - - devpath = absolute_path(devname); - if (devpath == NULL) { - fprintf(stderr, "%s: failed to resolve path to %s\n", - progname, devname); - return -1; - } - - ret = access(devname, F_OK); - if (ret != 0) { - if (strncmp(devpath, "/dev/", 5) == 0) { - /* nobody sane wants to create a loopback file under - * /dev. Let's just report the device doesn't exist */ - fprintf(stderr, "%s: %s apparently does not exist\n", - progname, devpath); - ret = -1; - goto out; - } - ret = 0; - goto out; - } - ret = stat(devpath, &st); - if (ret != 0) { - fprintf(stderr, "%s: cannot stat %s\n", progname, devpath); - goto out; - } - ret = S_ISBLK(st.st_mode); -out: - free(devpath); - return ret; -} - -static int is_feature_enabled(const char *feature, const char *devpath) -{ - char cmd[PATH_MAX]; - FILE *fp; - char enabled_features[4096] = ""; - int ret = 1; - - snprintf(cmd, sizeof(cmd), "%s -R features %s 2>&1", - DEBUGFS, devpath); - - /* Using popen() instead of run_command() since debugfs does - * not return proper error code if command is not supported */ - fp = popen(cmd, "r"); - if (!fp) { - fprintf(stderr, "%s: %s\n", progname, strerror(errno)); - return 0; - } - - ret = fread(enabled_features, 1, sizeof(enabled_features) - 1, fp); - enabled_features[ret] = '\0'; - fclose(fp); - - if (strstr(enabled_features, feature)) - return 1; - return 0; -} - /* Enable quota accounting */ int ldiskfs_enable_quota(struct mkfs_opts *mop) {