Whamcloud - gitweb
LU-18160 lnet: ensure lnetctl ping completes in a finite time 10/56110/7
authorTimothy Day <timday@amazon.com>
Wed, 21 Aug 2024 16:31:29 +0000 (16:31 +0000)
committerOleg Drokin <green@whamcloud.com>
Fri, 30 Aug 2024 06:01:16 +0000 (06:01 +0000)
Make the wait_for_completion_timeout() in lnet_ping() interruptible,
so users can halt lnetctl. Remove the second wait_for_completion()
after LNetMDUnlink(). Per the comments in lib-md.c, active messages
associated with the MD may get aborted upon unlinking. So just give
up on the ping.

Clean up some nearby code to make the logic more clear.

Signed-off-by: Timothy Day <timday@amazon.com>
Change-Id: Ia49c9b1f152f411ebabc5e1c00ff3f03a99f3596
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56110
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
lnet/lnet/api-ni.c

index 91f993f..2053f1b 100644 (file)
@@ -10019,7 +10019,6 @@ static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid,
        u32 *st;
        int nob;
        int rc;
-       int rc2;
 
        genradix_init(&plist->lgpl_list);
 
@@ -10057,27 +10056,22 @@ static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid,
        init_completion(&pd.completion);
 
        rc = LNetMDBind(&md, LNET_UNLINK, &pd.mdh);
-       if (rc != 0) {
+       if (rc) {
                CERROR("Can't bind MD: %d\n", rc);
                goto fail_ping_buffer_decref;
        }
 
        rc = LNetGet(src_nid, pd.mdh, id, LNET_RESERVED_PORTAL,
                     LNET_PROTO_PING_MATCHBITS, 0, false);
-       if (rc != 0) {
-               /* Don't CERROR; this could be deliberate! */
-               rc2 = LNetMDUnlink(pd.mdh);
-               LASSERT(rc2 == 0);
-
-               /* NB must wait for the UNLINK event below... */
-       }
+       if (rc)
+               LASSERT(!LNetMDUnlink(pd.mdh));
 
        /* Ensure completion in finite time... */
-       wait_for_completion_timeout(&pd.completion, timeout);
-       if (!pd.pd_unlinked) {
+       wait_for_completion_interruptible_timeout(&pd.completion,
+                                                 timeout);
+
+       if (!pd.pd_unlinked)
                LNetMDUnlink(pd.mdh);
-               wait_for_completion(&pd.completion);
-       }
 
        if (!pd.replied) {
                rc = pd.rc ?: -EIO;