From 3a9dda58080420583f5fdf9e67e0ddb2cc74e040 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Wed, 15 Nov 2023 12:22:24 -0700 Subject: [PATCH] LU-17839 kfilnd: Wait for hello response to mark peer uptodate We need to ensure that a target peer has processed a hello request from the sender before initiating network transactions. This can be positively affirmed iif we receive a hello response message from the target. There are two issues where messages may be dropped because hello request or response has not been processed. Issue 1 - Race: A@kfi -> HELLO REQ -> B@kfi A@kfi <- HELLO REQ <- B@kfi A@kfi processes HELLO REQ, marks B@kfi uptodate A@kfi -> MSG -> B@kfi A@kfi -> HELLO RSP -> B@kfi MSG is dropped by B@kfi because it did not process A@kfi's HELLO REQ or RSP. Issue 2 - HELLO target already considers originator as uptodate A@kfi -> HELLO REQ -> B@kfi B@kfi processes HELLO REQ A@kfi <- MSG <- B@kfi A@kfi <- HELLO RSP <- B@kfi MSG is dropped by A@kfi because it did not process B@kfi's HELLO RSP. We resolve the first race by waiting for the hello responses to be processed before marking the peer as uptodate. To ensure that we will always receive a hello response, the target of a hello request must initiate its own handshake with the originator. When we receive a hello request from a new peer then instead of setting the peer state to KP_STATE_UPTODATE we instead set it to KP_STATE_WAIT_RSP. We can process RX events for peer in this state, but sends to this peer will be throttled until we receive a hello response from it. To resolve the second race we need an additional change to allow TN_EVENT_RX_OK events to be replayed until the hello response is received and processed. However, this could result in state changes that invalidate RX_OK events on replay. Thus, this race will remain open. Add CFS_KFI_REPLAY_RX_HELLO_REQ fail_loc to delay the processing of an incoming hello request. Add CFS_KFI_FAIL_MSG_TYPE_EAGAIN to delay the sending of specified message types. HPE-bug-id: LUS-11673 Test-Parameters: trivial Signed-off-by: Chris Horn Change-Id: Iaaa6b4a533dbcf13cd2a8c1365a89ba521d70af0 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55070 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Ian Ziemba Reviewed-by: Ron Gredvig Reviewed-by: Oleg Drokin --- lnet/klnds/kfilnd/kfilnd.h | 11 ++++++++++- lnet/klnds/kfilnd/kfilnd_ep.c | 4 +++- lnet/klnds/kfilnd/kfilnd_peer.c | 22 +++++++++------------- lnet/klnds/kfilnd/kfilnd_tn.c | 16 ++++++++++++++++ 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/lnet/klnds/kfilnd/kfilnd.h b/lnet/klnds/kfilnd/kfilnd.h index 92b1a75..816337a 100644 --- a/lnet/klnds/kfilnd/kfilnd.h +++ b/lnet/klnds/kfilnd/kfilnd.h @@ -105,6 +105,8 @@ #define CFS_KFI_FAIL_WAIT_SEND_COMP2 0xF116 #define CFS_KFI_FAIL_WAIT_SEND_COMP3 0xF117 #define CFS_KFI_REPLAY_IDLE_EVENT 0xF118 +#define CFS_KFI_REPLAY_RX_HELLO_REQ 0xF119 +#define CFS_KFI_FAIL_MSG_TYPE_EAGAIN 0xF11A /* Maximum number of transaction keys supported. */ #define KFILND_EP_KEY_BITS 16U @@ -241,6 +243,12 @@ struct kfilnd_ep { #define KP_STATE_STALE 0x3 /* We suspect this peer is actually down or otherwise unreachable */ #define KP_STATE_DOWN 0x4 +/* We received a HELLO request from a new peer, and are waiting + * for the response to our HELLO request. We can handle RX events for + * such a peer, but we will throttle sends to this peer until it is + * up-to-date + */ +#define KP_STATE_WAIT_RSP 0x5 struct kfilnd_peer { struct rhash_head kp_node; @@ -286,11 +294,12 @@ static inline bool kfilnd_peer_is_new_peer(struct kfilnd_peer *kp) return atomic_read(&kp->kp_state) == KP_STATE_NEW; } +/* We need to throttle messages if the peer is not up-to-date or stale */ static inline bool kfilnd_peer_needs_throttle(struct kfilnd_peer *kp) { unsigned int kp_state = atomic_read(&kp->kp_state); - return (kp_state == KP_STATE_NEW || kp_state == KP_STATE_DOWN); + return !(kp_state == KP_STATE_UPTODATE || kp_state == KP_STATE_STALE); } /* Peer needs hello if it is not up to date and there is not already a hello diff --git a/lnet/klnds/kfilnd/kfilnd_ep.c b/lnet/klnds/kfilnd/kfilnd_ep.c index 904295a..18f53fc 100644 --- a/lnet/klnds/kfilnd/kfilnd_ep.c +++ b/lnet/klnds/kfilnd/kfilnd_ep.c @@ -406,7 +406,9 @@ int kfilnd_ep_post_send(struct kfilnd_ep *ep, struct kfilnd_transaction *tn) return 0; } else if (CFS_FAIL_CHECK(CFS_KFI_FAIL_SEND)) { return -EIO; - } else if (CFS_FAIL_CHECK(CFS_KFI_FAIL_SEND_EAGAIN)) { + } else if (CFS_FAIL_CHECK_VALUE(CFS_KFI_FAIL_MSG_TYPE_EAGAIN, + tn->tn_tx_msg.msg->type) || + CFS_FAIL_CHECK(CFS_KFI_FAIL_SEND_EAGAIN)) { return -EAGAIN; } diff --git a/lnet/klnds/kfilnd/kfilnd_peer.c b/lnet/klnds/kfilnd/kfilnd_peer.c index 2c8ab95..6c66739 100644 --- a/lnet/klnds/kfilnd/kfilnd_peer.c +++ b/lnet/klnds/kfilnd/kfilnd_peer.c @@ -364,21 +364,17 @@ void kfilnd_peer_process_hello(struct kfilnd_peer *kp, struct kfilnd_msg *msg) libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr, msg->proto.hello.version, KFILND_MSG_VERSION, kp->kp_version); - } else { + if (atomic_cmpxchg(&kp->kp_state, KP_STATE_NEW, + KP_STATE_WAIT_RSP) == KP_STATE_NEW) + CDEBUG(D_NET, "Peer %s(%p):0x%llx new -> wait response\n", + libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr); + } else if (msg->type == KFILND_MSG_HELLO_RSP) { kp->kp_version = msg->proto.hello.version; - CDEBUG(D_NET,"Peer %s(%p):0x%llx negotiated version: %u\n", + atomic_set(&kp->kp_state, KP_STATE_UPTODATE); + CDEBUG(D_NET, + "Peer %s(%p):0x%llx is up-to-date negotiated version: %u\n", libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr, msg->proto.hello.version); - } - - atomic_set(&kp->kp_state, KP_STATE_UPTODATE); - CDEBUG(D_NET, "kp %s(%p):0x%llx is up-to-date\n", - libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr); - - /* Clear kp_hello_state if we've received the hello response, - * otherwise this is an incoming hello request and we may have our - * own hello request to this peer still outstanding - */ - if (msg->type == KFILND_MSG_HELLO_RSP) kfilnd_peer_clear_hello_state(kp); + } } diff --git a/lnet/klnds/kfilnd/kfilnd_tn.c b/lnet/klnds/kfilnd/kfilnd_tn.c index eba1077..2ab71e8 100644 --- a/lnet/klnds/kfilnd/kfilnd_tn.c +++ b/lnet/klnds/kfilnd/kfilnd_tn.c @@ -846,6 +846,22 @@ static int kfilnd_tn_state_idle(struct kfilnd_transaction *tn, switch (msg->type) { case KFILND_MSG_HELLO_REQ: + if (CFS_FAIL_CHECK(CFS_KFI_REPLAY_RX_HELLO_REQ)) { + CDEBUG(D_NET, "Replay RX HELLO_REQ\n"); + return -EAGAIN; + } + + if (kfilnd_peer_is_new_peer(tn->tn_kp)) { + rc = kfilnd_send_hello_request(tn->tn_ep->end_dev, + tn->tn_ep->end_cpt, + tn->tn_kp); + if (rc) + KFILND_TN_ERROR(tn, + "Failed to send hello request: rc=%d", + rc); + rc = 0; + } + kfilnd_peer_process_hello(tn->tn_kp, msg); tn->tn_target_addr = kfilnd_peer_get_kfi_addr(tn->tn_kp); KFILND_TN_DEBUG(tn, "Using peer %s(%#llx)", -- 1.8.3.1