From: Chris Horn Date: Sat, 2 May 2020 15:37:15 +0000 (-0500) Subject: LU-13509 ptlrpc: Clear bd_registered in ptlrpc_unregister_bulk X-Git-Tag: 2.13.55~175 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=15057a17ca1e2a0e796cfada6abeb28aa77679cf LU-13509 ptlrpc: Clear bd_registered in ptlrpc_unregister_bulk The patch for LU-12816 https://review.whamcloud.com/36309 has us clearing the bd_registered flag in ptl_send_rpc(). This flag is set in ptlrpc_register_bulk(), so it makes sense for us to clear it in ptlrpc_unregister_bulk(). When we're cleaning up in ptl_send_rpc() we can be sure the flag is cleared with the call to ptlrpc_unregister_bulk(). This commit also adds a test case for the LU-12816 bug. Fixes: e6225c07ce4c ("LU-12816 ptlrpc: ptlrpc_register_bulk LBUG on ENOMEM") Signed-off-by: Chris Horn Change-Id: Iabaf109aaf72894cd5acbcacbb0299929ea1a146 Reviewed-on: https://review.whamcloud.com/38457 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Shaun Tancheff Reviewed-by: Wang Shilong Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 57ec62f..d4b3585 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -448,6 +448,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3 0x520 #define OBD_FAIL_PTLRPC_BULK_ATTACH 0x521 +#define OBD_FAIL_PTLRPC_BULK_REPLY_ATTACH 0x522 #define OBD_FAIL_PTLRPC_RESEND_RACE 0x525 #define OBD_FAIL_PTLRPC_ROUND_XID 0x530 #define OBD_FAIL_PTLRPC_CONNECT_RACE 0x531 diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 43b9db0..bca143e 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -446,6 +446,9 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) LASSERT(!in_interrupt()); /* might sleep */ + if (desc) + desc->bd_registered = 0; + /* Let's setup deadline for reply unlink. */ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0) @@ -827,9 +830,16 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) request->rq_repmsg = NULL; } - reply_me = LNetMEAttach(request->rq_reply_portal, - connection->c_peer, request->rq_xid, 0, - LNET_UNLINK, LNET_INS_AFTER); + if (request->rq_bulk && + OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_REPLY_ATTACH)) { + reply_me = ERR_PTR(-ENOMEM); + } else { + reply_me = LNetMEAttach(request->rq_reply_portal, + connection->c_peer, + request->rq_xid, 0, + LNET_UNLINK, LNET_INS_AFTER); + } + if (IS_ERR(reply_me)) { rc = PTR_ERR(reply_me); CERROR("LNetMEAttach failed: %d\n", rc); @@ -931,9 +941,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) cleanup_bulk: /* We do sync unlink here as there was no real transfer here so * the chance to have long unlink to sluggish net is smaller here. */ - ptlrpc_unregister_bulk(request, 0); - if (request->rq_bulk != NULL) - request->rq_bulk->bd_registered = 0; + ptlrpc_unregister_bulk(request, 0); out: if (rc == -ENOMEM) { /* set rq_sent so that this request is treated diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index e07a051..1fde0e9 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -22788,6 +22788,14 @@ test_423() { } run_test 423 "statfs should return a right data" +test_424() { +#define OBD_FAIL_PTLRPC_BULK_REPLY_ATTACH 0x522 | OBD_FAIL_ONCE + $LCTL set_param fail_loc=0x80000522 + dd if=/dev/zero of=$DIR/$tfile bs=2M count=1 oflag=sync + rm -f $DIR/$tfile +} +run_test 424 "simulate ENOMEM in ptl_send_rpc bulk reply ME attach" + prep_801() { [[ $MDS1_VERSION -lt $(version_code 2.9.55) ]] || [[ $OST1_VERSION -lt $(version_code 2.9.55) ]] &&