From 4a81be263079d0eaf6c9b2aaf151ab06e442760e Mon Sep 17 00:00:00 2001 From: Andriy Skulysh Date: Tue, 19 Dec 2017 11:20:21 +0200 Subject: [PATCH] LU-10643 ptlrpc: ptlrpc_register_bulk() LBUG on ENOMEM Assertion fails on !desc->bd_registered during retry after ENOMEM. Drop bd_registered flag and exit via cleanup_bulk to ensure that bulk is fully unregistered. Cray-bug-id: MRP-4733 Change-Id: I51be5ec041ef903040bf8508156da8079511c9f7 Signed-off-by: Andriy Skulysh Reviewed-on: https://review.whamcloud.com/31228 Reviewed-by: Alexandr Boyko Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andrew Perepechko Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/ptlrpc/niobuf.c | 10 ++++++++-- lustre/tests/sanity.sh | 8 ++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 1e3fd9b..3844961 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -427,6 +427,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3 0x520 +#define OBD_FAIL_PTLRPC_BULK_ATTACH 0x521 #define OBD_FAIL_OBD_PING_NET 0x600 #define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 330ce4a..c5867f0 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -375,8 +375,13 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req) LNET_MD_OP_GET : LNET_MD_OP_PUT); ptlrpc_fill_bulk_md(&md, desc, posted_md); - rc = LNetMEAttach(desc->bd_portal, peer, mbits, 0, + if (posted_md > 0 && posted_md + 1 == total_md && + OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_ATTACH)) { + rc = -ENOMEM; + } else { + rc = LNetMEAttach(desc->bd_portal, peer, mbits, 0, LNET_UNLINK, LNET_INS_AFTER, &me_h); + } if (rc != 0) { CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n", desc->bd_import->imp_obd->obd_name, mbits, @@ -405,6 +410,7 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req) LASSERT(desc->bd_md_count >= 0); mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); req->rq_status = -ENOMEM; + desc->bd_registered = 0; RETURN(-ENOMEM); } @@ -791,7 +797,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) if (request->rq_bulk != NULL) { rc = ptlrpc_register_bulk (request); if (rc != 0) - GOTO(out, rc); + GOTO(cleanup_bulk, rc); /* * All the mds in the request will have the same cpt * encoded in the cookie. So we can just get the first diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 8c84927..76a8e08 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -18088,6 +18088,14 @@ test_413() { } run_test 413 "mkdir on less full MDTs" +test_414() { +#define OBD_FAIL_PTLRPC_BULK_ATTACH 0x521 + $LCTL set_param fail_loc=0x80000521 + dd if=/dev/zero of=$DIR/$tfile bs=2M count=1 oflag=sync + rm -f $DIR/$tfile +} +run_test 414 "simulate ENOMEM in ptlrpc_register_bulk()" + prep_801() { [[ $(lustre_version_code mds1) -lt $(version_code 2.9.55) ]] || [[ $(lustre_version_code ost1) -lt $(version_code 2.9.55) ]] && -- 1.8.3.1