From 1666840bb06bbeeb35b2f9a51f9235c36886a3c6 Mon Sep 17 00:00:00 2001 From: Timothy Day Date: Wed, 21 Aug 2024 16:31:29 +0000 Subject: [PATCH] LU-18160 lnet: ensure lnetctl ping completes in a finite time Make the wait_for_completion_timeout() in lnet_ping() interruptible, so users can halt lnetctl. Remove the second wait_for_completion() after LNetMDUnlink(). Per the comments in lib-md.c, active messages associated with the MD may get aborted upon unlinking. So just give up on the ping. Clean up some nearby code to make the logic more clear. Signed-off-by: Timothy Day Change-Id: Ia49c9b1f152f411ebabc5e1c00ff3f03a99f3596 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56110 Reviewed-by: James Simmons Reviewed-by: Oleg Drokin Reviewed-by: Serguei Smirnov Tested-by: Maloo Tested-by: jenkins --- lnet/lnet/api-ni.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 91f993f..2053f1b 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -10019,7 +10019,6 @@ static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid, u32 *st; int nob; int rc; - int rc2; genradix_init(&plist->lgpl_list); @@ -10057,27 +10056,22 @@ static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid, init_completion(&pd.completion); rc = LNetMDBind(&md, LNET_UNLINK, &pd.mdh); - if (rc != 0) { + if (rc) { CERROR("Can't bind MD: %d\n", rc); goto fail_ping_buffer_decref; } rc = LNetGet(src_nid, pd.mdh, id, LNET_RESERVED_PORTAL, LNET_PROTO_PING_MATCHBITS, 0, false); - if (rc != 0) { - /* Don't CERROR; this could be deliberate! */ - rc2 = LNetMDUnlink(pd.mdh); - LASSERT(rc2 == 0); - - /* NB must wait for the UNLINK event below... */ - } + if (rc) + LASSERT(!LNetMDUnlink(pd.mdh)); /* Ensure completion in finite time... */ - wait_for_completion_timeout(&pd.completion, timeout); - if (!pd.pd_unlinked) { + wait_for_completion_interruptible_timeout(&pd.completion, + timeout); + + if (!pd.pd_unlinked) LNetMDUnlink(pd.mdh); - wait_for_completion(&pd.completion); - } if (!pd.replied) { rc = pd.rc ?: -EIO; -- 1.8.3.1