From: Chris Horn Date: Fri, 19 Aug 2022 20:27:26 +0000 (-0600) Subject: LU-16214 kfilnd: Keep stale peer entries X-Git-Tag: 2.15.54~80 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=refs%2Fchanges%2F85%2F48785%2F4;p=fs%2Flustre-release.git LU-16214 kfilnd: Keep stale peer entries A peer is currently removed from the cache whenever there is a network failure associated with the peer. This leads to situations where incoming messages from that peer will be dropped until a handshake can be completed. If we instead keep these stale peer entries then we at least have a chance of completing future transactions with the peer. To accomplish this, we introduce states to struct kfilnd_peer. When a kfilnd_peer is newly allocated it is assigned a state of KP_STATE_NEW. kfilnd_peer_is_new_peer() is modified to check for this state rather than check if kp_version is set. When a handshake is completed the peer is assigned a state of KP_STATE_UPTODATE. When a peer that is up-to-date experiences a failed network operation then it is assigned a state of KP_STATE_STALE. kfilnd_peer_stale() is introduced to set this state. Existing callers of kfilnd_peer_down() are converted to call kfilnd_peer_stale(). kfilnd_peer_down() is renamed to kfilnd_peer_del(). We will initiate a handshake to any peer that is in either KP_STATE_NEW or KP_STATE_STALE. kfilnd_peer_needs_hello() is modified accordingly. struct kfilnd_peer::kp_last_alive is checked by kfilnd_peer_stale(). If we haven't heard from a stale peer within five LND timeout periods, then that peer is deleted. An additional kfilnd_peer_alive() call is added to kfilnd_tn_state_idle() for the TN_EVENT_RX_HELLO case, so that peer aliveness is updated when we receive a hello request or response. HPE-bug-id: LUS-11125 Test-Parameters: trivial Signed-off-by: Chris Horn Change-Id: Icfb722e58fa334d983df02742dc456a55ac2abc3 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48785 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Ian Ziemba Reviewed-by: Oleg Drokin Reviewed-by: Ron Gredvig --- diff --git a/lnet/klnds/kfilnd/kfilnd.c b/lnet/klnds/kfilnd/kfilnd.c index d926b3f..e4efdd4 100644 --- a/lnet/klnds/kfilnd/kfilnd.c +++ b/lnet/klnds/kfilnd/kfilnd.c @@ -161,7 +161,10 @@ static int kfilnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *msg) if (kfilnd_peer_needs_hello(tn->tn_kp)) { rc = kfilnd_send_hello_request(dev, cpt, tn->tn_kp); - if (rc) { + if (rc && kfilnd_peer_is_new_peer(tn->tn_kp)) { + /* Only fail the send if this is a new peer. Otherwise + * attempt the send using our stale peer information + */ kfilnd_tn_free(tn); return rc; } diff --git a/lnet/klnds/kfilnd/kfilnd.h b/lnet/klnds/kfilnd/kfilnd.h index c027d7a..4c21532 100644 --- a/lnet/klnds/kfilnd/kfilnd.h +++ b/lnet/klnds/kfilnd/kfilnd.h @@ -221,6 +221,13 @@ struct kfilnd_ep { struct kfilnd_immediate_buffer end_immed_bufs[]; }; +/* Newly allocated peer */ +#define KP_STATE_NEW 0x1 +/* Peer after successful hello handshake */ +#define KP_STATE_UPTODATE 0x2 +/* Peer experienced some sort of network failure */ +#define KP_STATE_STALE 0x3 + struct kfilnd_peer { struct rhash_head kp_node; struct rcu_head kp_rcu_head; @@ -236,6 +243,7 @@ struct kfilnd_peer { u32 kp_remote_session_key; atomic_t kp_hello_pending; time64_t kp_hello_ts; + atomic_t kp_state; }; static inline bool kfilnd_peer_deleted(struct kfilnd_peer *kp) @@ -259,7 +267,7 @@ static inline void kfilnd_peer_clear_hello_pending(struct kfilnd_peer *kp) static inline bool kfilnd_peer_is_new_peer(struct kfilnd_peer *kp) { - return kp->kp_version == 0; + return atomic_read(&kp->kp_state) == KP_STATE_NEW; } /* Peer needs hello if it is not up to date and there is not already a hello @@ -271,7 +279,7 @@ static inline bool kfilnd_peer_is_new_peer(struct kfilnd_peer *kp) static inline bool kfilnd_peer_needs_hello(struct kfilnd_peer *kp) { if (atomic_read(&kp->kp_hello_pending) == 0) { - if (kfilnd_peer_is_new_peer(kp)) + if (atomic_read(&kp->kp_state) != KP_STATE_UPTODATE) return true; } else if (ktime_before(kp->kp_hello_ts + lnet_get_lnd_timeout(), ktime_get_seconds())) { diff --git a/lnet/klnds/kfilnd/kfilnd_peer.c b/lnet/klnds/kfilnd/kfilnd_peer.c index e6eb8c4..d65a21b 100644 --- a/lnet/klnds/kfilnd/kfilnd_peer.c +++ b/lnet/klnds/kfilnd/kfilnd_peer.c @@ -56,10 +56,33 @@ static void kfilnd_peer_free(void *ptr, void *arg) } /** - * kfilnd_peer_down() - Mark a peer as down. - * @kp: Peer to be downed. + * kfilnd_peer_stale() - Mark a peer as stale. + * @kp: Peer to be marked stale + * Note: only "up-to-date" peers can be marked stale. If we haven't completed + * a transaction with this peer within 5 LND timeouts then delete this peer. */ -void kfilnd_peer_down(struct kfilnd_peer *kp) +void kfilnd_peer_stale(struct kfilnd_peer *kp) +{ + if (atomic_cmpxchg(&kp->kp_state, + KP_STATE_UPTODATE, + KP_STATE_STALE) == KP_STATE_UPTODATE) { + CDEBUG(D_NET, "%s(%p):0x%llx is stale\n", + libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr); + } else if (ktime_before(kp->kp_last_alive + lnet_get_lnd_timeout() * 5, + ktime_get_seconds())) { + CDEBUG(D_NET, + "Haven't heard from %s(%p):0x%llx in %lld seconds\n", + libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr, + ktime_sub(ktime_get_seconds(), kp->kp_last_alive)); + kfilnd_peer_del(kp); + } +} + +/** + * kfilnd_peer_del() - Mark a peer for deletion + * @kp: Peer to be deleted + */ +void kfilnd_peer_del(struct kfilnd_peer *kp) { if (atomic_cmpxchg(&kp->kp_remove_peer, 0, 1) == 0) { struct lnet_nid peer_nid; @@ -173,6 +196,7 @@ again: atomic_set(&kp->kp_rx_base, 0); atomic_set(&kp->kp_remove_peer, 0); atomic_set(&kp->kp_hello_pending, 0); + atomic_set(&kp->kp_state, KP_STATE_NEW); kp->kp_local_session_key = kfilnd_dev_get_session_key(dev); kp->kp_hello_ts = ktime_get_seconds(); @@ -291,6 +315,10 @@ void kfilnd_peer_process_hello(struct kfilnd_peer *kp, struct kfilnd_msg *msg) msg->proto.hello.version); } + atomic_set(&kp->kp_state, KP_STATE_UPTODATE); + CDEBUG(D_NET, "kp %s(%p):0x%llx is up-to-date\n", + libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr); + /* Clear kp_hello_pending if we've received the hello response, * otherwise this is an incoming hello request and we may have our * own hello request to this peer still outstanding diff --git a/lnet/klnds/kfilnd/kfilnd_peer.h b/lnet/klnds/kfilnd/kfilnd_peer.h index 91cdc1b..612c883 100644 --- a/lnet/klnds/kfilnd/kfilnd_peer.h +++ b/lnet/klnds/kfilnd/kfilnd_peer.h @@ -34,7 +34,8 @@ #include "kfilnd.h" -void kfilnd_peer_down(struct kfilnd_peer *kp); +void kfilnd_peer_stale(struct kfilnd_peer *kp); +void kfilnd_peer_del(struct kfilnd_peer *kp); void kfilnd_peer_put(struct kfilnd_peer *kp); struct kfilnd_peer *kfilnd_peer_get(struct kfilnd_dev *dev, lnet_nid_t nid); void kfilnd_peer_alive(struct kfilnd_peer *kp); diff --git a/lnet/klnds/kfilnd/kfilnd_tn.c b/lnet/klnds/kfilnd/kfilnd_tn.c index b9f94aa..9518672 100644 --- a/lnet/klnds/kfilnd/kfilnd_tn.c +++ b/lnet/klnds/kfilnd/kfilnd_tn.c @@ -793,6 +793,8 @@ static int kfilnd_tn_state_idle(struct kfilnd_transaction *tn, case TN_EVENT_RX_HELLO: msg = tn->tn_rx_msg.msg; + kfilnd_peer_alive(tn->tn_kp); + switch (msg->type) { case KFILND_MSG_HELLO_REQ: kfilnd_peer_process_hello(tn->tn_kp, msg); @@ -871,7 +873,7 @@ static int kfilnd_tn_state_imm_send(struct kfilnd_transaction *tn, hstatus = LNET_MSG_STATUS_REMOTE_ERROR; kfilnd_tn_status_update(tn, status, hstatus); - kfilnd_peer_down(tn->tn_kp); + kfilnd_peer_stale(tn->tn_kp); if (tn->msg_type == KFILND_MSG_HELLO_REQ) kfilnd_peer_clear_hello_pending(tn->tn_kp); break; @@ -1053,7 +1055,7 @@ static int kfilnd_tn_state_wait_comp(struct kfilnd_transaction *tn, hstatus = LNET_MSG_STATUS_REMOTE_ERROR; kfilnd_tn_status_update(tn, status, hstatus); - kfilnd_peer_down(tn->tn_kp); + kfilnd_peer_stale(tn->tn_kp); /* Need to cancel the tagged receive to prevent resources from * being leaked. @@ -1137,7 +1139,7 @@ static int kfilnd_tn_state_wait_tag_rma_comp(struct kfilnd_transaction *tn, hstatus = LNET_MSG_STATUS_REMOTE_ERROR; kfilnd_tn_status_update(tn, status, hstatus); - kfilnd_peer_down(tn->tn_kp); + kfilnd_peer_stale(tn->tn_kp); break; default: @@ -1218,7 +1220,7 @@ static int kfilnd_tn_state_wait_tag_comp(struct kfilnd_transaction *tn, hstatus = LNET_MSG_STATUS_REMOTE_ERROR; kfilnd_tn_status_update(tn, status, hstatus); - kfilnd_peer_down(tn->tn_kp); + kfilnd_peer_stale(tn->tn_kp); break; case TN_EVENT_TAG_TX_OK: @@ -1244,7 +1246,7 @@ static int kfilnd_tn_state_fail(struct kfilnd_transaction *tn, switch (event) { case TN_EVENT_TX_FAIL: - kfilnd_peer_down(tn->tn_kp); + kfilnd_peer_stale(tn->tn_kp); break; case TN_EVENT_TX_OK: @@ -1276,7 +1278,7 @@ static int kfilnd_tn_state_wait_timeout_tag_comp(struct kfilnd_transaction *tn, case TN_EVENT_TAG_RX_CANCEL: kfilnd_tn_status_update(tn, -ETIMEDOUT, LNET_MSG_STATUS_REMOTE_TIMEOUT); - kfilnd_peer_down(tn->tn_kp); + kfilnd_peer_stale(tn->tn_kp); break; case TN_EVENT_TAG_RX_FAIL: