Whamcloud - gitweb
LU-13509 ptlrpc: Clear bd_registered in ptlrpc_unregister_bulk 57/38457/4
authorChris Horn <hornc@cray.com>
Sat, 2 May 2020 15:37:15 +0000 (10:37 -0500)
committerOleg Drokin <green@whamcloud.com>
Tue, 2 Jun 2020 04:42:30 +0000 (04:42 +0000)
The patch for LU-12816 https://review.whamcloud.com/36309 has us
clearing the bd_registered flag in ptl_send_rpc(). This flag is set
in ptlrpc_register_bulk(), so it makes sense for us to clear it in
ptlrpc_unregister_bulk(). When we're cleaning up in ptl_send_rpc()
we can be sure the flag is cleared with the call to
ptlrpc_unregister_bulk().

This commit also adds a test case for the LU-12816 bug.

Fixes: e6225c07ce4c ("LU-12816 ptlrpc: ptlrpc_register_bulk LBUG on ENOMEM")
Signed-off-by: Chris Horn <hornc@cray.com>
Change-Id: Iabaf109aaf72894cd5acbcacbb0299929ea1a146
Reviewed-on: https://review.whamcloud.com/38457
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-by: Wang Shilong <wshilong@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/ptlrpc/niobuf.c
lustre/tests/sanity.sh

index 57ec62f..d4b3585 100644 (file)
@@ -448,6 +448,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
 #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3  0x520
 #define OBD_FAIL_PTLRPC_BULK_ATTACH      0x521
 #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
 #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3  0x520
 #define OBD_FAIL_PTLRPC_BULK_ATTACH      0x521
+#define OBD_FAIL_PTLRPC_BULK_REPLY_ATTACH      0x522
 #define OBD_FAIL_PTLRPC_RESEND_RACE     0x525
 #define OBD_FAIL_PTLRPC_ROUND_XID       0x530
 #define OBD_FAIL_PTLRPC_CONNECT_RACE    0x531
 #define OBD_FAIL_PTLRPC_RESEND_RACE     0x525
 #define OBD_FAIL_PTLRPC_ROUND_XID       0x530
 #define OBD_FAIL_PTLRPC_CONNECT_RACE    0x531
index 43b9db0..bca143e 100644 (file)
@@ -446,6 +446,9 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
 
        LASSERT(!in_interrupt());     /* might sleep */
 
 
        LASSERT(!in_interrupt());     /* might sleep */
 
+       if (desc)
+               desc->bd_registered = 0;
+
        /* Let's setup deadline for reply unlink. */
        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
            async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0)
        /* Let's setup deadline for reply unlink. */
        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
            async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0)
@@ -827,9 +830,16 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                        request->rq_repmsg = NULL;
                }
 
                        request->rq_repmsg = NULL;
                }
 
-               reply_me = LNetMEAttach(request->rq_reply_portal,
-                                       connection->c_peer, request->rq_xid, 0,
-                                       LNET_UNLINK, LNET_INS_AFTER);
+               if (request->rq_bulk &&
+                   OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_REPLY_ATTACH)) {
+                       reply_me = ERR_PTR(-ENOMEM);
+               } else {
+                       reply_me = LNetMEAttach(request->rq_reply_portal,
+                                               connection->c_peer,
+                                               request->rq_xid, 0,
+                                               LNET_UNLINK, LNET_INS_AFTER);
+               }
+
                if (IS_ERR(reply_me)) {
                        rc = PTR_ERR(reply_me);
                        CERROR("LNetMEAttach failed: %d\n", rc);
                if (IS_ERR(reply_me)) {
                        rc = PTR_ERR(reply_me);
                        CERROR("LNetMEAttach failed: %d\n", rc);
@@ -931,9 +941,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
  cleanup_bulk:
        /* We do sync unlink here as there was no real transfer here so
         * the chance to have long unlink to sluggish net is smaller here. */
  cleanup_bulk:
        /* We do sync unlink here as there was no real transfer here so
         * the chance to have long unlink to sluggish net is smaller here. */
-        ptlrpc_unregister_bulk(request, 0);
-       if (request->rq_bulk != NULL)
-               request->rq_bulk->bd_registered = 0;
+       ptlrpc_unregister_bulk(request, 0);
  out:
        if (rc == -ENOMEM) {
                /* set rq_sent so that this request is treated
  out:
        if (rc == -ENOMEM) {
                /* set rq_sent so that this request is treated
index e07a051..1fde0e9 100755 (executable)
@@ -22788,6 +22788,14 @@ test_423() {
 }
 run_test 423 "statfs should return a right data"
 
 }
 run_test 423 "statfs should return a right data"
 
+test_424() {
+#define OBD_FAIL_PTLRPC_BULK_REPLY_ATTACH      0x522 | OBD_FAIL_ONCE
+       $LCTL set_param fail_loc=0x80000522
+       dd if=/dev/zero of=$DIR/$tfile bs=2M count=1 oflag=sync
+       rm -f $DIR/$tfile
+}
+run_test 424 "simulate ENOMEM in ptl_send_rpc bulk reply ME attach"
+
 prep_801() {
        [[ $MDS1_VERSION -lt $(version_code 2.9.55) ]] ||
        [[ $OST1_VERSION -lt $(version_code 2.9.55) ]] &&
 prep_801() {
        [[ $MDS1_VERSION -lt $(version_code 2.9.55) ]] ||
        [[ $OST1_VERSION -lt $(version_code 2.9.55) ]] &&