Whamcloud - gitweb
LU-17839 kfilnd: Wait for hello response to mark peer uptodate 70/55070/2
authorChris Horn <chris.horn@hpe.com>
Wed, 15 Nov 2023 19:22:24 +0000 (12:22 -0700)
committerOleg Drokin <green@whamcloud.com>
Wed, 29 May 2024 04:49:41 +0000 (04:49 +0000)
We need to ensure that a target peer has processed a hello request
from the sender before initiating network transactions. This can be
positively affirmed iif we receive a hello response message from
the target.

There are two issues where messages may be dropped because hello
request or response has not been processed.

Issue 1 - Race:
A@kfi -> HELLO REQ -> B@kfi
A@kfi <- HELLO REQ <- B@kfi
A@kfi processes HELLO REQ, marks B@kfi uptodate
A@kfi -> MSG -> B@kfi
A@kfi -> HELLO RSP -> B@kfi

MSG is dropped by B@kfi because it did not process A@kfi's HELLO REQ
or RSP.

Issue 2 - HELLO target already considers originator as uptodate
A@kfi -> HELLO REQ -> B@kfi
B@kfi processes HELLO REQ
A@kfi <- MSG <- B@kfi
A@kfi <- HELLO RSP <- B@kfi

MSG is dropped by A@kfi because it did not process B@kfi's HELLO RSP.

We resolve the first race by waiting for the hello responses to
be processed before marking the peer as uptodate. To ensure that
we will always receive a hello response, the target of a hello request
must initiate its own handshake with the originator. When we receive
a hello request from a new peer then instead of setting the peer state
to KP_STATE_UPTODATE we instead set it to KP_STATE_WAIT_RSP. We can
process RX events for peer in this state, but sends to this peer will
be throttled until we receive a hello response from it.

To resolve the second race we need an additional change to allow
TN_EVENT_RX_OK events to be replayed until the hello response is
received and processed. However, this could result in state changes
that invalidate RX_OK events on replay. Thus, this race will remain
open.

Add CFS_KFI_REPLAY_RX_HELLO_REQ fail_loc to delay the processing of
an incoming hello request.

Add CFS_KFI_FAIL_MSG_TYPE_EAGAIN to delay the sending of specified
message types.

HPE-bug-id: LUS-11673
Test-Parameters: trivial
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: Iaaa6b4a533dbcf13cd2a8c1365a89ba521d70af0
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55070
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Ian Ziemba <ian.ziemba@hpe.com>
Reviewed-by: Ron Gredvig <ron.gredvig@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/klnds/kfilnd/kfilnd.h
lnet/klnds/kfilnd/kfilnd_ep.c
lnet/klnds/kfilnd/kfilnd_peer.c
lnet/klnds/kfilnd/kfilnd_tn.c

index 92b1a75..816337a 100644 (file)
 #define CFS_KFI_FAIL_WAIT_SEND_COMP2 0xF116
 #define CFS_KFI_FAIL_WAIT_SEND_COMP3 0xF117
 #define CFS_KFI_REPLAY_IDLE_EVENT 0xF118
+#define CFS_KFI_REPLAY_RX_HELLO_REQ 0xF119
+#define CFS_KFI_FAIL_MSG_TYPE_EAGAIN 0xF11A
 
 /* Maximum number of transaction keys supported. */
 #define KFILND_EP_KEY_BITS 16U
@@ -241,6 +243,12 @@ struct kfilnd_ep {
 #define KP_STATE_STALE 0x3
 /* We suspect this peer is actually down or otherwise unreachable */
 #define KP_STATE_DOWN 0x4
+/* We received a HELLO request from a new peer, and are waiting
+ * for the response to our HELLO request. We can handle RX events for
+ * such a peer, but we will throttle sends to this peer until it is
+ * up-to-date
+ */
+#define KP_STATE_WAIT_RSP 0x5
 
 struct kfilnd_peer {
        struct rhash_head kp_node;
@@ -286,11 +294,12 @@ static inline bool kfilnd_peer_is_new_peer(struct kfilnd_peer *kp)
        return atomic_read(&kp->kp_state) == KP_STATE_NEW;
 }
 
+/* We need to throttle messages if the peer is not up-to-date or stale */
 static inline bool kfilnd_peer_needs_throttle(struct kfilnd_peer *kp)
 {
        unsigned int kp_state = atomic_read(&kp->kp_state);
 
-       return (kp_state == KP_STATE_NEW || kp_state == KP_STATE_DOWN);
+       return !(kp_state == KP_STATE_UPTODATE || kp_state == KP_STATE_STALE);
 }
 
 /* Peer needs hello if it is not up to date and there is not already a hello
index 904295a..18f53fc 100644 (file)
@@ -406,7 +406,9 @@ int kfilnd_ep_post_send(struct kfilnd_ep *ep, struct kfilnd_transaction *tn)
                        return 0;
        } else if (CFS_FAIL_CHECK(CFS_KFI_FAIL_SEND)) {
                return -EIO;
-       } else if (CFS_FAIL_CHECK(CFS_KFI_FAIL_SEND_EAGAIN)) {
+       } else if (CFS_FAIL_CHECK_VALUE(CFS_KFI_FAIL_MSG_TYPE_EAGAIN,
+                                        tn->tn_tx_msg.msg->type) ||
+                  CFS_FAIL_CHECK(CFS_KFI_FAIL_SEND_EAGAIN)) {
                return -EAGAIN;
        }
 
index 2c8ab95..6c66739 100644 (file)
@@ -364,21 +364,17 @@ void kfilnd_peer_process_hello(struct kfilnd_peer *kp, struct kfilnd_msg *msg)
                       libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr,
                       msg->proto.hello.version, KFILND_MSG_VERSION,
                       kp->kp_version);
-       } else {
+               if (atomic_cmpxchg(&kp->kp_state, KP_STATE_NEW,
+                                  KP_STATE_WAIT_RSP) == KP_STATE_NEW)
+                       CDEBUG(D_NET, "Peer %s(%p):0x%llx new -> wait response\n",
+                              libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr);
+       } else if (msg->type == KFILND_MSG_HELLO_RSP) {
                kp->kp_version = msg->proto.hello.version;
-               CDEBUG(D_NET,"Peer %s(%p):0x%llx negotiated version: %u\n",
+               atomic_set(&kp->kp_state, KP_STATE_UPTODATE);
+               CDEBUG(D_NET,
+                      "Peer %s(%p):0x%llx is up-to-date negotiated version: %u\n",
                       libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr,
                       msg->proto.hello.version);
-       }
-
-       atomic_set(&kp->kp_state, KP_STATE_UPTODATE);
-       CDEBUG(D_NET, "kp %s(%p):0x%llx is up-to-date\n",
-              libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr);
-
-       /* Clear kp_hello_state if we've received the hello response,
-        * otherwise this is an incoming hello request and we may have our
-        * own hello request to this peer still outstanding
-        */
-       if (msg->type == KFILND_MSG_HELLO_RSP)
                kfilnd_peer_clear_hello_state(kp);
+       }
 }
index eba1077..2ab71e8 100644 (file)
@@ -846,6 +846,22 @@ static int kfilnd_tn_state_idle(struct kfilnd_transaction *tn,
 
                switch (msg->type) {
                case KFILND_MSG_HELLO_REQ:
+                       if (CFS_FAIL_CHECK(CFS_KFI_REPLAY_RX_HELLO_REQ)) {
+                               CDEBUG(D_NET, "Replay RX HELLO_REQ\n");
+                               return -EAGAIN;
+                       }
+
+                       if (kfilnd_peer_is_new_peer(tn->tn_kp)) {
+                               rc = kfilnd_send_hello_request(tn->tn_ep->end_dev,
+                                                              tn->tn_ep->end_cpt,
+                                                              tn->tn_kp);
+                               if (rc)
+                                       KFILND_TN_ERROR(tn,
+                                                       "Failed to send hello request: rc=%d",
+                                                       rc);
+                               rc = 0;
+                       }
+
                        kfilnd_peer_process_hello(tn->tn_kp, msg);
                        tn->tn_target_addr = kfilnd_peer_get_kfi_addr(tn->tn_kp);
                        KFILND_TN_DEBUG(tn, "Using peer %s(%#llx)",