Whamcloud - gitweb
LU-14124 target: set OBD_MD_FLGRANT in read's reply 71/45471/2
authorVladimir Saveliev <c17830@cray.com>
Wed, 20 Oct 2021 10:32:11 +0000 (13:32 +0300)
committerOleg Drokin <green@whamcloud.com>
Sun, 14 Nov 2021 05:45:35 +0000 (05:45 +0000)
If tgt_grant_shrink() decides to not shrink grants - a client is
supposed to restore its cl_grant_avail in osc_update_grant(). In case
of read OBD_MD_FLGRANT is not set on reply's body->oa.o_valid, so
osc_update_grant() misses the cl_grant_avail update. As result server
keeps thinking that client has a lot of grants while a client thinks
that it is missing grants badly. That may lead to performance
degradation.

A test to illustrate the issue is included.

Lustre-change: https://review.whamcloud.com/43375
Lustre-commit: 4894683342d77964daeded9fbc608fc46aa479ee

Test-Parameters: testlist=sanity
Change-Id: Ibe7ce0af5701226c8be3ae3f9ad57c354791fa0f
Signed-off-by: Vladimir Saveliev <vlaidimir.saveliev@hpe.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/45471
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/target/tgt_handler.c
lustre/tests/sanity.sh

index 2178966..0c36112 100644 (file)
@@ -2316,6 +2316,8 @@ int tgt_brw_read(struct tgt_session_info *tsi)
        } else {
                repbody->oa.o_valid = 0;
        }
+       if (body->oa.o_valid & OBD_MD_FLGRANT)
+               repbody->oa.o_valid |= OBD_MD_FLGRANT;
        /* We're finishing using body->oa as an input variable */
 
        /* Check if client was evicted while we were doing i/o before touching
index 78c84af..79d58bb 100755 (executable)
@@ -7173,6 +7173,60 @@ test_64f() {
 }
 run_test 64f "check grant consumption (with grant allocation)"
 
+test_64h() {
+       local cli=$($LFS getname $DIR); cli=${cli%% *}; cli=${cli#*-}
+       local osc_tgt="$FSNAME-OST0000-osc-$cli"
+       local num_exps=$(do_facet ost1 \
+           $LCTL get_param -n obdfilter.*OST0000*.num_exports)
+       local max_brw_size=$(import_param $osc_tgt max_brw_size)
+       local avail=$($LCTL get_param -n osc.*OST0000-osc-$cli.kbytesavail)
+       local p="$TMP/$TESTSUITE-$TESTNAME.parameters"
+
+       # 10MiB is for file to be written, max_brw_size * 16 *
+       # num_exps is space reserve so that tgt_grant_shrink() decided
+       # to not shrink
+       local expect=$((max_brw_size * 16 * num_exps + 10 * 1048576))
+       (( avail * 1024 < expect )) &&
+               skip "need $expect bytes on ost1, have $(( avail * 1024 )) only"
+
+       save_lustre_params client "osc.*OST0000*.grant_shrink" > $p
+       save_lustre_params client "osc.*OST0000*.grant_shrink_interval" >> $p
+       stack_trap "restore_lustre_params < $p; rm -f $save" EXIT
+       $LCTL set_param osc.*OST0000*.grant_shrink=1
+       $LCTL set_param osc.*OST0000*.grant_shrink_interval=10
+
+       $LFS setstripe -c 1 -i 0 $DIR/$tfile
+       dd if=/dev/zero of=$DIR/$tfile bs=1M count=10 oflag=sync
+
+       # drop cache so that coming read would do rpc
+       cancel_lru_locks osc
+
+       # shrink interval is set to 10, pause for 7 seconds so that
+       # grant thread did not wake up yet but coming read entered
+       # shrink mode for rpc (osc_should_shrink_grant())
+       sleep 7
+
+       declare -a cur_grant_bytes
+       declare -a tot_granted
+       cur_grant_bytes[0]=$($LCTL get_param -n osc.*OST0000*.cur_grant_bytes)
+       tot_granted[0]=$(do_facet ost1 \
+           $LCTL get_param -n obdfilter.*OST0000*.tot_granted)
+
+       dd if=$DIR/$tfile bs=4K count=1 of=/dev/null
+
+       cur_grant_bytes[1]=$($LCTL get_param -n osc.*OST0000*.cur_grant_bytes)
+       tot_granted[1]=$(do_facet ost1 \
+           $LCTL get_param -n obdfilter.*OST0000*.tot_granted)
+
+       # grant change should be equal on both sides
+       (( cur_grant_bytes[0] - cur_grant_bytes[1] ==
+               tot_granted[0] - tot_granted[1])) ||
+               error "grant change mismatch, "                                \
+                       "server: ${tot_granted[0]} to ${tot_granted[1]}, "     \
+                       "client: ${cur_grant_bytes[0]} to ${cur_grant_bytes[1]}"
+}
+run_test 64h "grant shrink on read"
+
 # bug 1414 - set/get directories' stripe info
 test_65a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run"