Disable automatic max_sectors_kb tuning via mount.lustre by default.
This conflicts in EXA with tune_devices.sh that has SFA-specific
tuning parameters from the tune_devices.sh script.
Disable l_tunedisk in /etc/udev/rules.d/99-lustre-server.rules by
default for EXA so that it does not conflict with the EXA script.
Allow shrinking max_sectors_kb if explicitly set via mount option.
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I58cf548d08f8680ec5d6ffd00e936a5d903ebbe5
Reviewed-by: Wang Shilong <wshilong@whamcloud.com>
Reviewed-by: Li Xi <lixi@ddn.com>
Reviewed-on: https://review.whamcloud.com/41839
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
# Ensure block devices re-added to the system allow for large writes (LU-9551)
# Currently applies only to ldiskfs-formatted devices
-ACTION!="add|change", GOTO="l_tunedisk_end"
-SUBSYSTEM!="block", GOTO="l_tunedisk_end"
-ENV{ID_FS_TYPE}!="ext4", GOTO="l_tunedisk_end"
-
-ENV{ID_FS_LABEL}=="MGS|*-MDT*|*-OST*", RUN+="/usr/sbin/l_tunedisk /dev/%k"
-
-LABEL="l_tunedisk_end"
+# Exascaler: l_tunedisk disabled to avoid conflict with EXA script.
+#ACTION!="add|change", GOTO="l_tunedisk_end"
+#SUBSYSTEM!="block", GOTO="l_tunedisk_end"
+#ENV{ID_FS_TYPE}!="ext4", GOTO="l_tunedisk_end"
+#
+#ENV{ID_FS_LABEL}=="MGS|*-MDT*|*-OST*", RUN+="/usr/sbin/l_tunedisk /dev/%k"
+#
+#LABEL="l_tunedisk_end"
.BI abort_recov
Abort client recovery and start the target service immediately.
.TP
-.BI md_stripe_cache_size
-Sets the stripe cache size for server side disk with a striped raid
-configuration.
-.TP
-.BI max_sectors_kb
-Automatically Sets the block device parameter of 'max_sectors_kb' for the
-MDT or OST target. When max_sectors_kb isn't specified, that parameter for
-block device will be set to same as it's own 'max_hw_sectors_kb' (up to a
-maximum of 16M), this is default behavior suited for most users. When
-max_sectors_kb is specified as zero, the old parameter value will be kept.
-When max_sectors_kb is specified as a positive number, the parameter will
-be set to this number arbitrarily.
+.BI md_stripe_cache_size= cache_size
+Sets the MD RAID 'stripe_cache_size' parameter to
+.I cache_size
+KiB for server OST or MDT block device with a striped RAID configuration.
+.TP
+.BI max_sectors_kb= max_io_size
+Sets the
+.BI /sys/block/ block_device /queue/max_sectors_kb
+parameter for the underlying MDT or OST block device to the maximum allowed
+IO size in KB. Larger values typically give better throughput, if the
+underlying device can handle these requests, because of better efficiency
+handling each request and much less need to merge many IO requests. If
+.IR max_io_size=-1 ,
+the device parameter will be set to the 'max_hw_sectors_kb' of that
+device, up to a maximum of 16384KiB (16MiB), and is suitable for most users
+unless they have already tuned 'max_sectors_kb' externally. When
+.BR max_io_size=0 ,
+the original value is unchanged, which should be used if the block
+device is tuned by some external mechanism before mount. Otherwise, the
+device parameter is set to the positive number of KB specified by
+.BR max_io_size .
.TP
.BI recovery_time_soft= timeout
-Allow 'timeout' seconds for clients to reconnect for recovery after a server
+Allow
+.I timeout
+seconds for clients to reconnect for recovery after a server
crash. This timeout will be incrementally extended if it is about to expire
and the server is still handling new connections from recoverable clients.
The default soft recovery timeout is set to 300 seconds (5 minutes).
{
char path[PATH_MAX];
unsigned long max_hw_sectors_kb;
- unsigned long old_max_sectors_kb;
+ unsigned long old_max_sectors_kb = 0;
unsigned long new_max_sectors_kb;
char buf[3 * sizeof(old_max_sectors_kb) + 2];
int rc;
- if (mop->mo_max_sectors_kb >= 0) {
+ if (mop->mo_max_sectors_kb == 0)
+ return 0;
+
+ if (mop->mo_max_sectors_kb > 0) {
new_max_sectors_kb = mop->mo_max_sectors_kb;
goto have_new_max_sectors_kb;
}
if (new_max_sectors_kb > 16 * 1024)
new_max_sectors_kb = 16 * 1024;
-have_new_max_sectors_kb:
snprintf(path, sizeof(path), "%s/%s", sys_path, MAX_SECTORS_KB_PATH);
rc = read_file(path, buf, sizeof(buf));
if (rc != 0) {
return 0;
}
+ /* Don't shrink the current limit if it comes from the default */
if (new_max_sectors_kb <= old_max_sectors_kb)
return 0;
+have_new_max_sectors_kb:
snprintf(buf, sizeof(buf), "%lu", new_max_sectors_kb);
rc = write_file(path, buf);
if (rc != 0) {
return rc;
}
- fprintf(stderr, "%s: increased '%s' from %lu to %lu\n",
- progname, path, old_max_sectors_kb, new_max_sectors_kb);
+ if (old_max_sectors_kb)
+ fprintf(stderr, "%s: increased '%s' from %lu to %lu\n",
+ progname, path, old_max_sectors_kb, new_max_sectors_kb);
+ else
+ fprintf(stderr, "%s: changed '%s' to %lu by request\n",
+ progname, path, new_max_sectors_kb);
return 0;
}
mop->mo_md_stripe_cache_size = 16384;
mop->mo_orig_options = "";
mop->mo_nosvc = 0;
- mop->mo_max_sectors_kb = -1;
+ mop->mo_max_sectors_kb = 0;
}
static int parse_opts(int argc, char *const argv[], struct mount_opts *mop)