From: Prakash Surya Date: Tue, 18 Dec 2012 17:34:46 +0000 (-0800) Subject: LU-2498 mount: Set 'deadline' scheduler on ldiskfs X-Git-Tag: 2.4.51~57 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=57c78c901b772d196e82f018209a393e092be2cf LU-2498 mount: Set 'deadline' scheduler on ldiskfs The recommended IO scheduler to use with Lustre on top of ldiskfs is either the 'deadline' scheduler, or the 'noop' scheduler. This patch attempts to automatically configure a device to use the 'deadline' scheduler for the user at mount time. This makes it unnecessary to use any out of band mechanism to ensure this value is set properly (i.e. start up scripts, kernel configuration, etc.). Also, if the scheduler is already set to 'noop', this patch will leave that setting in place. This makes it easy for a user to manually set the scheduler to 'noop', if they would prefer to use that scheduler instead of 'deadline'. Signed-off-by: Prakash Surya Change-Id: If869468ba5370afddb29233dd893d74b9c45f367 Reviewed-on: http://review.whamcloud.com/4853 Tested-by: Hudson Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/utils/mount_utils_ldiskfs.c b/lustre/utils/mount_utils_ldiskfs.c index 45c491e..bf8e2c8 100644 --- a/lustre/utils/mount_utils_ldiskfs.c +++ b/lustre/utils/mount_utils_ldiskfs.c @@ -82,8 +82,11 @@ #define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb" #define MAX_SECTORS_KB_PATH "queue/max_sectors_kb" +#define SCHEDULER_PATH "queue/scheduler" #define STRIPE_CACHE_SIZE "md/stripe_cache_size" +#define DEFAULT_SCHEDULER "deadline" + extern char *progname; #define L_BLOCK_SIZE 4096 @@ -787,7 +790,7 @@ int ldiskfs_prepare_lustre(struct mkfs_opts *mop, return 0; } -int read_file(char *path, char *buf, int size) +int read_file(const char *path, char *buf, int size) { FILE *fd; @@ -805,7 +808,7 @@ int read_file(char *path, char *buf, int size) return 0; } -int write_file(char *path, char *buf) +int write_file(const char *path, const char *buf) { FILE *fd; @@ -818,6 +821,51 @@ int write_file(char *path, char *buf) return 0; } +int set_blockdev_scheduler(const char *path, const char *scheduler) +{ + char buf[PATH_MAX], *c; + int rc; + + /* Before setting the scheduler, we need to check to see if it's + * already set to "noop". If it is, we don't want to override + * that setting. If it's set to anything other than "noop", set + * the scheduler to what has been passed in. */ + + rc = read_file(path, buf, sizeof(buf)); + if (rc) { + if (verbose) + fprintf(stderr, "%s: cannot open '%s': %s\n", + progname, path, strerror(errno)); + return rc; + } + + /* The expected format of buf: noop anticipatory deadline [cfq] */ + c = strchr(buf, '['); + + /* If c is NULL, the format is not what we expect. Play it safe + * and error out. */ + if (c == NULL) { + if (verbose) + fprintf(stderr, "%s: cannot parse scheduler " + "options for '%s'\n", progname, path); + return -EINVAL; + } + + if (strncmp(c+1, "noop", 4) == 0) + return 0; + + rc = write_file(path, scheduler); + if (rc) { + if (verbose) + fprintf(stderr, "%s: cannot set scheduler on " + "'%s': %s\n", progname, path, + strerror(errno)); + return rc; + } + + return rc; +} + /* This is to tune the kernel for good SCSI performance. * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb * to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */ @@ -970,6 +1018,12 @@ set_params: } } + /* Purposely ignore errors reported from set_blockdev_scheduler. + * The worst that will happen is a block device with an "incorrect" + * scheduler. */ + snprintf(real_path, sizeof(real_path), "%s/%s", path, SCHEDULER_PATH); + set_blockdev_scheduler(real_path, DEFAULT_SCHEDULER); + if (fan_out) { char *slave = NULL; glob_info.gl_pathc = 0;