During 16MB bulk RPC I/O evaluation on rhel7, due to kernel
BIO_MAX_PAGES (256) limit, the 16MB IO is divided into 16 1MB
I/O submitting to underly block device one by one. And we found
that the SFA disk driver got lots of 1MB IOs.
To optimize the performance, this patch introduces blk_plug into
osd-ldiskfs when do bio, before submit IOs, it calls blk_start_plug,
after submit all 16MB IOs, calls blk_finish_plug, so that the 16MB
bulk IO will have more change to merge in the block evelvator
scheduler layer.
Signed-off-by: Qian Yingjin <qian@ddn.com>
Change-Id: If26db9f85baf97bc441cc4ad19d5c9f97bd3d7e5
Reviewed-on: https://review.whamcloud.com/26697
Tested-by: Jenkins
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Li Xi <lixi@ddn.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
]) # LC_EXPORT_SIMPLE_SETATTR
#
]) # LC_EXPORT_SIMPLE_SETATTR
#
+# LC_HAVE_BLK_PLUG
+#
+# 2.6.38 add struct blk_plug
+#
+AC_DEFUN([LC_HAVE_BLK_PLUG], [
+LB_CHECK_COMPILE([if 'struct blk_plug' exists],
+blk_plug, [
+ #include <linux/blkdev.h>
+],[
+ struct blk_plug plug;
+
+ blk_start_plug(&plug);
+ blk_finish_plug(&plug);
+],[
+ AC_DEFINE(HAVE_BLK_PLUG, 1,
+ [blk_plug struct exists])
+])
+]) # LC_HAVE_BLK_PLUG
+
+#
# LC_IOP_TRUNCATE
#
# truncate callback removed since 2.6.39
# LC_IOP_TRUNCATE
#
# truncate callback removed since 2.6.39
LC_INODE_I_RCU
LC_D_COMPARE_7ARGS
LC_D_DELETE_CONST
LC_INODE_I_RCU
LC_D_COMPARE_7ARGS
LC_D_DELETE_CONST
# 2.6.39
LC_REQUEST_QUEUE_UNPLUG_FN
# 2.6.39
LC_REQUEST_QUEUE_UNPLUG_FN
#define queue_max_hw_segments(rq) queue_max_segments(rq)
#endif
#define queue_max_hw_segments(rq) queue_max_segments(rq)
#endif
+#ifdef HAVE_BLK_PLUG
+#define DECLARE_PLUG(plug) struct blk_plug plug
+#else /* !HAVE_BLK_PLUG */
+#define DECLARE_PLUG(name)
+#define blk_start_plug(plug) do {} while (0)
+#define blk_finish_plug(plug) do {} while (0)
+#endif
+
#ifdef HAVE_KMAP_ATOMIC_HAS_1ARG
#define ll_kmap_atomic(a, b) kmap_atomic(a)
#define ll_kunmap_atomic(a, b) kunmap_atomic(a)
#ifdef HAVE_KMAP_ATOMIC_HAS_1ARG
#define ll_kmap_atomic(a, b) kmap_atomic(a)
#define ll_kunmap_atomic(a, b) kunmap_atomic(a)
int page_idx;
int i;
int rc = 0;
int page_idx;
int i;
int rc = 0;
ENTRY;
LASSERT(iobuf->dr_npages == npages);
ENTRY;
LASSERT(iobuf->dr_npages == npages);
osd_brw_stats_update(osd, iobuf);
iobuf->dr_start_time = cfs_time_current();
osd_brw_stats_update(osd, iobuf);
iobuf->dr_start_time = cfs_time_current();
for (page_idx = 0, block_idx = 0;
page_idx < npages;
page_idx++, block_idx += blocks_per_page) {
for (page_idx = 0, block_idx = 0;
page_idx < npages;
page_idx++, block_idx += blocks_per_page) {
+ blk_finish_plug(&plug);
+
/* in order to achieve better IO throughput, we don't wait for writes
* completion here. instead we proceed with transaction commit in
* parallel and wait for IO completion once transaction is stopped
/* in order to achieve better IO throughput, we don't wait for writes
* completion here. instead we proceed with transaction commit in
* parallel and wait for IO completion once transaction is stopped