From e0adb618a4b0d0182419a5731fe046e9157b9f51 Mon Sep 17 00:00:00 2001 From: Mikhail Pershin Date: Mon, 22 Apr 2019 21:18:01 +0300 Subject: [PATCH] LU-10777 dom: disable read-on-open with resend The read-on-open can fill more data on reply buffer than client allocated, this causes buffer re-allocation followed by resend. Meanwhile FIO read test shows that such resends perform worse than separate READ RPC. For example: FIO 8k read is ~50% better without buffer re-allocation with resend. Considering that there is parameter on MDC 'mdc_dom_min_repsize' to control read-on-open inline buffer size, there is no sense to keep 'reallocation+resend' option on MDT. Patch removes it. Signed-off-by: Mikhail Pershin Change-Id: I7eb9d64f5551789e93b1f7676f61c0e7a5149f76 Reviewed-on: https://review.whamcloud.com/34700 Reviewed-by: Patrick Farrell Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/mdt/mdt_io.c | 22 +++----------------- lustre/tests/sanity.sh | 55 -------------------------------------------------- 2 files changed, 3 insertions(+), 74 deletions(-) diff --git a/lustre/mdt/mdt_io.c b/lustre/mdt/mdt_io.c index 3999e26..633c36d 100644 --- a/lustre/mdt/mdt_io.c +++ b/lustre/mdt/mdt_io.c @@ -1396,7 +1396,6 @@ int mdt_dom_read_on_open(struct mdt_thread_info *mti, struct mdt_device *mdt, struct niobuf_remote *rnb = NULL; struct niobuf_local *lnb; int rc; - int max_reply_len; loff_t offset; unsigned int len, copied = 0; int lnbs, nr_local, i; @@ -1433,20 +1432,13 @@ int mdt_dom_read_on_open(struct mdt_thread_info *mti, struct mdt_device *mdt, if (mbo->mbo_dom_size == 0) RETURN(0); - /* check the maximum size available in reply */ - max_reply_len = - req->rq_rqbd->rqbd_svcpt->scp_service->srv_max_reply_size; - - CDEBUG(D_INFO, "File size %llu, reply sizes %d/%d/%d\n", - mbo->mbo_dom_size, max_reply_len, req->rq_reqmsg->lm_repsize, - req->rq_replen); + CDEBUG(D_INFO, "File size %llu, reply sizes %d/%d\n", + mbo->mbo_dom_size, req->rq_reqmsg->lm_repsize, req->rq_replen); len = req->rq_reqmsg->lm_repsize - req->rq_replen; - max_reply_len -= req->rq_replen; /* NB: at this moment we have the following sizes: * - req->rq_replen: used data in reply * - req->rq_reqmsg->lm_repsize: total allocated reply buffer at client - * - max_reply_len: maximum reply size allowed by protocol * * Ideal case when file size fits in allocated reply buffer, * that mean we can return whole data in reply. We can also fit more @@ -1458,20 +1450,12 @@ int mdt_dom_read_on_open(struct mdt_thread_info *mti, struct mdt_device *mdt, * * At the moment the following strategy is used: * 1) try to fit into the buffer we have - * 2) respond with bigger buffer so client will re-allocate it and - * resend (up to srv_max_reply_size value). - * 3) return just file tail otherwise. + * 2) return just file tail otherwise. */ if (mbo->mbo_dom_size <= len) { /* can fit whole data */ len = mbo->mbo_dom_size; offset = 0; - } else if (mbo->mbo_dom_size <= max_reply_len) { - /* It is worth to make this tunable ON/OFF because this will - * cause buffer re-allocation and resend - */ - len = mbo->mbo_dom_size; - offset = 0; } else { int tail, pgbits; diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 4f302fe..0c9fc6a 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -17554,61 +17554,6 @@ test_271d() { } run_test 271d "DoM: read on open (1K file in reply buffer)" -test_271e() { - [ $MDS1_VERSION -lt $(version_code 2.10.57) ] && - skip "Need MDS version at least 2.10.57" - - local dom=$DIR/$tdir/dom - local tmp=$TMP/${tfile}.data - trap "cleanup_271def_tests $tmp" EXIT - - mkdir -p $DIR/$tdir - - $LFS setstripe -E 1024K -L mdt $DIR/$tdir - - local mdtidx=$($LFS getstripe --mdt-index $DIR/$tdir) - - cancel_lru_locks mdc - dd if=/dev/urandom of=$tmp bs=30K count=1 - dd if=$tmp of=$dom bs=30K count=1 - cancel_lru_locks mdc - cat /etc/hosts >> $tmp - lctl set_param -n mdc.*.stats=clear - - echo "Append to the same page" - cat /etc/hosts >> $dom - - local num=$(get_mdc_stats $mdtidx ost_read) - local ra=$(get_mdc_stats $mdtidx req_active) - local rw=$(get_mdc_stats $mdtidx req_waittime) - - [ -z $num ] || error "$num READ RPC occured" - # Reply buffer can be adjusted for larger buffer by resend - echo "... DONE with $((ra - rw)) resends" - - # compare content - cmp $tmp $dom || error "file miscompare" - - cancel_lru_locks mdc - lctl set_param -n mdc.*.stats=clear - - echo "Open and read file" - cat $dom > /dev/null - local num=$(get_mdc_stats $mdtidx ost_read) - local ra=$(get_mdc_stats $mdtidx req_active) - local rw=$(get_mdc_stats $mdtidx req_waittime) - - [ -z $num ] || error "$num READ RPC occured" - # Reply buffer can be adjusted for larger buffer by resend - echo "... DONE with $((ra - rw)) resends" - - # compare content - cmp $tmp $dom || error "file miscompare" - - return 0 -} -run_test 271e "DoM: read on open (30K file with reply buffer adjusting)" - test_271f() { [ $MDS1_VERSION -lt $(version_code 2.10.57) ] && skip "Need MDS version at least 2.10.57" -- 1.8.3.1