Whamcloud - gitweb
LU-10643 ptlrpc: ptlrpc_register_bulk() LBUG on ENOMEM 28/31228/8
authorAndriy Skulysh <c17819@cray.com>
Tue, 19 Dec 2017 09:20:21 +0000 (11:20 +0200)
committerOleg Drokin <oleg.drokin@intel.com>
Mon, 9 Apr 2018 19:48:55 +0000 (19:48 +0000)
Assertion fails on !desc->bd_registered during
retry after ENOMEM.

Drop bd_registered flag and exit via cleanup_bulk
to ensure that bulk is fully unregistered.

Cray-bug-id: MRP-4733
Change-Id: I51be5ec041ef903040bf8508156da8079511c9f7
Signed-off-by: Andriy Skulysh <c17819@cray.com>
Reviewed-on: https://review.whamcloud.com/31228
Reviewed-by: Alexandr Boyko <c17825@cray.com>
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andrew Perepechko <c17827@cray.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd_support.h
lustre/ptlrpc/niobuf.c
lustre/tests/sanity.sh

index 1e3fd9b..3844961 100644 (file)
@@ -427,6 +427,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK  0x51b
 #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
 #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3  0x520
 #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK  0x51b
 #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
 #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3  0x520
+#define OBD_FAIL_PTLRPC_BULK_ATTACH      0x521
 
 #define OBD_FAIL_OBD_PING_NET            0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
 
 #define OBD_FAIL_OBD_PING_NET            0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
index 330ce4a..c5867f0 100644 (file)
@@ -375,8 +375,13 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
                              LNET_MD_OP_GET : LNET_MD_OP_PUT);
                ptlrpc_fill_bulk_md(&md, desc, posted_md);
 
                              LNET_MD_OP_GET : LNET_MD_OP_PUT);
                ptlrpc_fill_bulk_md(&md, desc, posted_md);
 
-               rc = LNetMEAttach(desc->bd_portal, peer, mbits, 0,
+               if (posted_md > 0 && posted_md + 1 == total_md &&
+                   OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_ATTACH)) {
+                       rc = -ENOMEM;
+               } else {
+                       rc = LNetMEAttach(desc->bd_portal, peer, mbits, 0,
                                  LNET_UNLINK, LNET_INS_AFTER, &me_h);
                                  LNET_UNLINK, LNET_INS_AFTER, &me_h);
+               }
                if (rc != 0) {
                        CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n",
                               desc->bd_import->imp_obd->obd_name, mbits,
                if (rc != 0) {
                        CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n",
                               desc->bd_import->imp_obd->obd_name, mbits,
@@ -405,6 +410,7 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
                LASSERT(desc->bd_md_count >= 0);
                mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
                req->rq_status = -ENOMEM;
                LASSERT(desc->bd_md_count >= 0);
                mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
                req->rq_status = -ENOMEM;
+               desc->bd_registered = 0;
                RETURN(-ENOMEM);
        }
 
                RETURN(-ENOMEM);
        }
 
@@ -791,7 +797,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
        if (request->rq_bulk != NULL) {
                rc = ptlrpc_register_bulk (request);
                if (rc != 0)
        if (request->rq_bulk != NULL) {
                rc = ptlrpc_register_bulk (request);
                if (rc != 0)
-                       GOTO(out, rc);
+                       GOTO(cleanup_bulk, rc);
                /*
                 * All the mds in the request will have the same cpt
                 * encoded in the cookie. So we can just get the first
                /*
                 * All the mds in the request will have the same cpt
                 * encoded in the cookie. So we can just get the first
index 8c84927..76a8e08 100755 (executable)
@@ -18088,6 +18088,14 @@ test_413() {
 }
 run_test 413 "mkdir on less full MDTs"
 
 }
 run_test 413 "mkdir on less full MDTs"
 
+test_414() {
+#define OBD_FAIL_PTLRPC_BULK_ATTACH      0x521
+       $LCTL set_param fail_loc=0x80000521
+       dd if=/dev/zero of=$DIR/$tfile bs=2M count=1 oflag=sync
+       rm -f $DIR/$tfile
+}
+run_test 414 "simulate ENOMEM in ptlrpc_register_bulk()"
+
 prep_801() {
        [[ $(lustre_version_code mds1) -lt $(version_code 2.9.55) ]] ||
        [[ $(lustre_version_code ost1) -lt $(version_code 2.9.55) ]] &&
 prep_801() {
        [[ $(lustre_version_code mds1) -lt $(version_code 2.9.55) ]] ||
        [[ $(lustre_version_code ost1) -lt $(version_code 2.9.55) ]] &&