Whamcloud - gitweb
LU-9704 grant: ignore grant info on read resend 74/45474/2
authorVladimir Saveliev <vlaidimir.saveliev@hpe.com>
Wed, 3 Nov 2021 10:52:14 +0000 (13:52 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 17 Nov 2021 18:44:56 +0000 (18:44 +0000)
The following scenario makes a message like "claims 28672 GRANT, real
grant 0" to appear:

 1. client owns X grants and run rpcs to shrink part of those
 2. server fails over so that the shrink rpc is to be resent.
 3. on the clinet reconnect server and client sync on initial amount
 of grants for the client.
 4. shrink rpc is resend, if server disk space is enough, shrink does
 not happen and the client adds amount of grants it was going to
 shrink to its newly initial amount of grants. Now, client thinks that
 it owns more grants than it does from server points of view.
 5. the client consumes grants and sends rpcs to server. Server avoids
 allocating new grants for the client if the current amount of grant
 is big enough:
static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant,
...
        if (curgrant >= want || curgrant >= ted->ted_grant + chunk)
                RETURN(0);
 6. client continues grants consuming which eventually leads to
 complains like "claims 28672 GRANT, real grant 0".

In case of resent of read and set_info:shrink RPCs grant info should
be ignored as it was reset on reconnect.

Tests to illustrate the issue is added.

Lustre-change: https://review.whamcloud.com/45371
Lustre-commit: TBD

Change-Id: I8af1db287dc61c713e5439f4cf6bd652ce02c12c
Signed-off-by: Vladimir Saveliev <vlaidimir.saveliev@hpe.com>
Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/45474
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/ofd/ofd_dev.c
lustre/target/tgt_handler.c
lustre/tests/sanity.sh

index 21a9f25..4c485e8 100644 (file)
@@ -896,6 +896,19 @@ static int ofd_set_info_hdl(struct tgt_session_info *tsi)
        if (is_grant_shrink) {
                body = req_capsule_client_get(tsi->tsi_pill, &RMF_OST_BODY);
 
+               /*
+                * Because we already sync grant info with client when
+                * reconnect, grant info will be cleared for resent
+                * req, otherwise, outdated grant count in the rpc
+                * would de-sync grant counters
+                */
+               if (lustre_msg_get_flags(req->rq_reqmsg) &
+                   (MSG_RESENT | MSG_REPLAY)) {
+                       DEBUG_REQ(D_CACHE, req,
+                                 "clear resent/replay req grant info");
+                       body->oa.o_valid &= ~OBD_MD_FLGRANT;
+               }
+
                repbody = req_capsule_server_get(tsi->tsi_pill, &RMF_OST_BODY);
                *repbody = *body;
 
index 0c36112..2ec6d01 100644 (file)
@@ -2232,6 +2232,17 @@ int tgt_brw_read(struct tgt_session_info *tsi)
                GOTO(out_lock, rc = -ETIMEDOUT);
        }
 
+       /*
+        * Because we already sync grant info with client when
+        * reconnect, grant info will be cleared for resent req,
+        * otherwise, outdated grant count in the rpc would de-sync
+        * grant counters in case of shrink
+        */
+       if (lustre_msg_get_flags(req->rq_reqmsg) & (MSG_RESENT | MSG_REPLAY)) {
+               DEBUG_REQ(D_CACHE, req, "clear resent/replay req grant info");
+               body->oa.o_valid &= ~OBD_MD_FLGRANT;
+       }
+
        repbody = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
        repbody->oa = body->oa;
 
index c9c01fb..45e9380 100755 (executable)
@@ -7239,6 +7239,42 @@ test_64h() {
 }
 run_test 64h "grant shrink on read"
 
+test_64i() {
+       local cli
+       local osc_tgt
+       local cgb
+       local testid=${TESTNAME/_/ }
+
+       (( $OST1_VERSION >= $(version_code 2.12.7) )) ||
+               skip "need OST at least 2.12.7 to avoid grant shrink on replay"
+
+       [ $PARALLEL == "yes" ] && skip "skip parallel run"
+       remote_ost_nodsh && skip "remote OSTs with nodsh"
+
+       $LFS setstripe -c 1 -i 0 $DIR/$tfile
+
+       dd if=/dev/zero of=$DIR/$tfile bs=1M count=64
+
+       # lustre-ffff9fc75e850800 /mnt/lustre -> ffff9fc75e850800
+       cli=$($LFS getname $DIR); cli=${cli%% *}; cli=${cli##*-}
+       osc_tgt="$FSNAME-OST0000-osc-$cli"
+       cgb=$($LCTL get_param -n osc.$osc_tgt.cur_grant_bytes)
+
+       # shrink grants and simulate rpc loss
+       #define OBD_FAIL_PTLRPC_DROP_REQ_OPC     0x513
+       do_facet ost1 "$LCTL set_param fail_loc=0x80000513 fail_val=17"
+       $LCTL set_param osc.$osc_tgt.cur_grant_bytes=$((cgb/2))
+
+       fail ost1
+
+       dd if=/dev/zero of=$DIR/$tfile oflag=append bs=1M count=8 conv=notrunc
+
+       do_facet ost1 dmesg | tac | sed "/$testid/,$ d" |
+               grep "GRANT, real grant" &&
+               error "client has more grants then it owns" || true
+}
+run_test 64i "shrink on reconnect"
+
 # bug 1414 - set/get directories' stripe info
 test_65a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run"