From 82e494a36e9ea4f51ec163ab15beb9fdda7fa8d6 Mon Sep 17 00:00:00 2001 From: Vladimir Saveliev Date: Fri, 15 Dec 2017 12:33:17 +0300 Subject: [PATCH] LU-8895 target: limit grant allocation tgt_grant_alloc() is missing a check for amount of space already granted to a client. If the client submits number of RPCs simultaneously when the client's grant is below its max amount of grants then the server may grant the client with amount of grants substantially exceeding the amount of grants requested in one RPC. In case of decent number of clients that may lead to ENOSPC long before the lack of disk space is really achieved. Limit grants given to a client to asked amount plus grants for 2 full write RPCs. A test to illustrate the issue is included. The test needs to lower debug level so that dd provided sufficient I/O throughput. Signed-off-by: Vladimir Saveliev Seagate-bug-id: MRP-4013 Change-Id: Ie6a8abbad28a06bc1d55ff2fd042b9664a29e9e4 Reviewed-on: https://review.whamcloud.com/24096 Reviewed-by: Andreas Dilger Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- lustre/target/tgt_grant.c | 8 +++++ lustre/tests/sanity.sh | 86 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/lustre/target/tgt_grant.c b/lustre/target/tgt_grant.c index 4fa0623..355a092 100644 --- a/lustre/target/tgt_grant.c +++ b/lustre/target/tgt_grant.c @@ -931,6 +931,14 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant, if ((grant > chunk) && conservative) grant = chunk; + /* + * Limit grant so that export' grant does not exceed what the + * client would like to have by more than grants for 2 full + * RPCs + */ + if (ted->ted_grant + grant > want + chunk) + grant = want + chunk - ted->ted_grant; + tgd->tgd_tot_granted += grant; ted->ted_grant += grant; diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 7bb1a57..8cdf3fe 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -5791,6 +5791,92 @@ test_64c() { } run_test 64c "verify grant shrink" +# this does exactly what osc_request.c:osc_announce_cached() does in +# order to calculate max amount of grants to ask from server +want_grant() { + local tgt=$1 + + local page_size=$(get_page_size client) + + local nrpages=$($LCTL get_param -n osc.${tgt}.max_pages_per_rpc) + local rpc_in_flight=$($LCTL get_param -n osc.${tgt}.max_rpcs_in_flight) + + ((rpc_in_flight ++)); + nrpages=$((nrpages * rpc_in_flight)) + + local dirty_max_pages=$($LCTL get_param -n osc.${tgt}.max_dirty_mb) + + dirty_max_pages=$((dirty_max_pages * 1024 * 1024 / page_size)) + + [[ $dirty_max_pages -gt $nrpages ]] && nrpages=$dirty_max_pages + local undirty=$((nrpages * page_size)) + + local max_extent_pages + max_extent_pages=$($LCTL get_param osc.${tgt}.import | + grep grant_max_extent_size | awk '{print $2}') + max_extent_pages=$((max_extent_pages / page_size)) + local nrextents=$(((nrpages + max_extent_pages - 1) / max_extent_pages)) + local grant_extent_tax + grant_extent_tax=$($LCTL get_param osc.${tgt}.import | + grep grant_extent_tax | awk '{print $2}') + + undirty=$((undirty + nrextents * grant_extent_tax)) + + echo $undirty +} + +# this is size of unit for grant allocation. It should be equal to +# what tgt_grant.c:tgt_grant_chunk() calculates +grant_chunk() { + local tgt=$1 + local max_brw_size + local grant_extent_tax + + max_brw_size=$($LCTL get_param osc.${tgt}.import | + grep max_brw_size | awk '{print $2}') + + grant_extent_tax=$($LCTL get_param osc.${tgt}.import | + grep grant_extent_tax | awk '{print $2}') + + echo $(((max_brw_size + grant_extent_tax) * 2)) +} + +test_64d() { + [ $(lustre_version_code ost1) -lt $(version_code 2.10.56) ] && + skip "OST < 2.10.55 doesn't limit grants enough" && return 0 + + local tgt=$($LCTL dl | grep "0000-osc-[^mM]" | awk '{print $4}') + + [[ $($LCTL get_param osc.${tgt}.import | + grep "connect_flags:.*grant_param") ]] || \ + { skip "no grant_param connect flag"; return; } + + local olddebug=$($LCTL get_param -n debug 2> /dev/null) + + $LCTL set_param debug="$OLDDEBUG" 2> /dev/null || true + + local max_cur_granted=$(($(want_grant $tgt) + $(grant_chunk $tgt))) + + $SETSTRIPE $DIR/$tfile -i 0 -c 1 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1000 & + ddpid=$! + + while true + do + local cur_grant=$($LCTL get_param -n osc.${tgt}.cur_grant_bytes) + if [[ $cur_grant -gt $max_cur_granted ]] + then + kill $ddpid + error "cur_grant $cur_grant > $max_cur_granted" + fi + kill -0 $ddpid + [[ $? -ne 0 ]] && break; + sleep 2 + done + $LCTL set_param debug="$olddebug" 2> /dev/null || true +} +run_test 64d "check grant limit exceed" + # bug 1414 - set/get directories' stripe info test_65a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return -- 1.8.3.1