Whamcloud - gitweb
LU-16214 kfilnd: Keep stale peer entries 85/48785/4
authorChris Horn <chris.horn@hpe.com>
Fri, 19 Aug 2022 20:27:26 +0000 (14:27 -0600)
committerOleg Drokin <green@whamcloud.com>
Thu, 19 Jan 2023 15:30:35 +0000 (15:30 +0000)
A peer is currently removed from the cache whenever there is a network
failure associated with the peer. This leads to situations where
incoming messages from that peer will be dropped until a handshake
can be completed.

If we instead keep these stale peer entries then we at least have a
chance of completing future transactions with the peer.

To accomplish this, we introduce states to struct kfilnd_peer.

When a kfilnd_peer is newly allocated it is assigned a state of
KP_STATE_NEW. kfilnd_peer_is_new_peer() is modified to check for this
state rather than check if kp_version is set.

When a handshake is completed the peer is assigned a state of
KP_STATE_UPTODATE.

When a peer that is up-to-date experiences a failed network operation
then it is assigned a state of KP_STATE_STALE. kfilnd_peer_stale() is
introduced to set this state. Existing callers of kfilnd_peer_down()
are converted to call kfilnd_peer_stale(). kfilnd_peer_down() is
renamed to kfilnd_peer_del().

We will initiate a handshake to any peer that is in either
KP_STATE_NEW or KP_STATE_STALE. kfilnd_peer_needs_hello() is
modified accordingly.

struct kfilnd_peer::kp_last_alive is checked by kfilnd_peer_stale().
If we haven't heard from a stale peer within five LND timeout periods,
then that peer is deleted.

An additional kfilnd_peer_alive() call is added to
kfilnd_tn_state_idle() for the TN_EVENT_RX_HELLO case, so that
peer aliveness is updated when we receive a hello request or response.

HPE-bug-id: LUS-11125
Test-Parameters: trivial
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: Icfb722e58fa334d983df02742dc456a55ac2abc3
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48785
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Ian Ziemba <ian.ziemba@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Ron Gredvig <ron.gredvig@hpe.com>
lnet/klnds/kfilnd/kfilnd.c
lnet/klnds/kfilnd/kfilnd.h
lnet/klnds/kfilnd/kfilnd_peer.c
lnet/klnds/kfilnd/kfilnd_peer.h
lnet/klnds/kfilnd/kfilnd_tn.c

index d926b3f..e4efdd4 100644 (file)
@@ -161,7 +161,10 @@ static int kfilnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *msg)
 
        if (kfilnd_peer_needs_hello(tn->tn_kp)) {
                rc = kfilnd_send_hello_request(dev, cpt, tn->tn_kp);
-               if (rc) {
+               if (rc && kfilnd_peer_is_new_peer(tn->tn_kp)) {
+                       /* Only fail the send if this is a new peer. Otherwise
+                        * attempt the send using our stale peer information
+                        */
                        kfilnd_tn_free(tn);
                        return rc;
                }
index c027d7a..4c21532 100644 (file)
@@ -221,6 +221,13 @@ struct kfilnd_ep {
        struct kfilnd_immediate_buffer end_immed_bufs[];
 };
 
+/* Newly allocated peer */
+#define KP_STATE_NEW 0x1
+/* Peer after successful hello handshake */
+#define KP_STATE_UPTODATE 0x2
+/* Peer experienced some sort of network failure */
+#define KP_STATE_STALE 0x3
+
 struct kfilnd_peer {
        struct rhash_head kp_node;
        struct rcu_head kp_rcu_head;
@@ -236,6 +243,7 @@ struct kfilnd_peer {
        u32 kp_remote_session_key;
        atomic_t kp_hello_pending;
        time64_t kp_hello_ts;
+       atomic_t kp_state;
 };
 
 static inline bool kfilnd_peer_deleted(struct kfilnd_peer *kp)
@@ -259,7 +267,7 @@ static inline void kfilnd_peer_clear_hello_pending(struct kfilnd_peer *kp)
 
 static inline bool kfilnd_peer_is_new_peer(struct kfilnd_peer *kp)
 {
-       return kp->kp_version == 0;
+       return atomic_read(&kp->kp_state) == KP_STATE_NEW;
 }
 
 /* Peer needs hello if it is not up to date and there is not already a hello
@@ -271,7 +279,7 @@ static inline bool kfilnd_peer_is_new_peer(struct kfilnd_peer *kp)
 static inline bool kfilnd_peer_needs_hello(struct kfilnd_peer *kp)
 {
        if (atomic_read(&kp->kp_hello_pending) == 0) {
-               if (kfilnd_peer_is_new_peer(kp))
+               if (atomic_read(&kp->kp_state) != KP_STATE_UPTODATE)
                        return true;
        } else if (ktime_before(kp->kp_hello_ts + lnet_get_lnd_timeout(),
                                ktime_get_seconds())) {
index e6eb8c4..d65a21b 100644 (file)
@@ -56,10 +56,33 @@ static void kfilnd_peer_free(void *ptr, void *arg)
 }
 
 /**
- * kfilnd_peer_down() - Mark a peer as down.
- * @kp: Peer to be downed.
+ * kfilnd_peer_stale() - Mark a peer as stale.
+ * @kp: Peer to be marked stale
+ * Note: only "up-to-date" peers can be marked stale. If we haven't completed
+ * a transaction with this peer within 5 LND timeouts then delete this peer.
  */
-void kfilnd_peer_down(struct kfilnd_peer *kp)
+void kfilnd_peer_stale(struct kfilnd_peer *kp)
+{
+       if (atomic_cmpxchg(&kp->kp_state,
+                          KP_STATE_UPTODATE,
+                          KP_STATE_STALE) == KP_STATE_UPTODATE) {
+               CDEBUG(D_NET, "%s(%p):0x%llx is stale\n",
+                      libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr);
+       } else if (ktime_before(kp->kp_last_alive + lnet_get_lnd_timeout() * 5,
+                              ktime_get_seconds())) {
+               CDEBUG(D_NET,
+                      "Haven't heard from %s(%p):0x%llx in %lld seconds\n",
+                      libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr,
+                      ktime_sub(ktime_get_seconds(), kp->kp_last_alive));
+               kfilnd_peer_del(kp);
+       }
+}
+
+/**
+ * kfilnd_peer_del() - Mark a peer for deletion
+ * @kp: Peer to be deleted
+ */
+void kfilnd_peer_del(struct kfilnd_peer *kp)
 {
        if (atomic_cmpxchg(&kp->kp_remove_peer, 0, 1) == 0) {
                struct lnet_nid peer_nid;
@@ -173,6 +196,7 @@ again:
        atomic_set(&kp->kp_rx_base, 0);
        atomic_set(&kp->kp_remove_peer, 0);
        atomic_set(&kp->kp_hello_pending, 0);
+       atomic_set(&kp->kp_state, KP_STATE_NEW);
        kp->kp_local_session_key = kfilnd_dev_get_session_key(dev);
        kp->kp_hello_ts = ktime_get_seconds();
 
@@ -291,6 +315,10 @@ void kfilnd_peer_process_hello(struct kfilnd_peer *kp, struct kfilnd_msg *msg)
                       msg->proto.hello.version);
        }
 
+       atomic_set(&kp->kp_state, KP_STATE_UPTODATE);
+       CDEBUG(D_NET, "kp %s(%p):0x%llx is up-to-date\n",
+              libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr);
+
        /* Clear kp_hello_pending if we've received the hello response,
         * otherwise this is an incoming hello request and we may have our
         * own hello request to this peer still outstanding
index 91cdc1b..612c883 100644 (file)
@@ -34,7 +34,8 @@
 
 #include "kfilnd.h"
 
-void kfilnd_peer_down(struct kfilnd_peer *kp);
+void kfilnd_peer_stale(struct kfilnd_peer *kp);
+void kfilnd_peer_del(struct kfilnd_peer *kp);
 void kfilnd_peer_put(struct kfilnd_peer *kp);
 struct kfilnd_peer *kfilnd_peer_get(struct kfilnd_dev *dev, lnet_nid_t nid);
 void kfilnd_peer_alive(struct kfilnd_peer *kp);
index b9f94aa..9518672 100644 (file)
@@ -793,6 +793,8 @@ static int kfilnd_tn_state_idle(struct kfilnd_transaction *tn,
        case TN_EVENT_RX_HELLO:
                msg = tn->tn_rx_msg.msg;
 
+               kfilnd_peer_alive(tn->tn_kp);
+
                switch (msg->type) {
                case KFILND_MSG_HELLO_REQ:
                        kfilnd_peer_process_hello(tn->tn_kp, msg);
@@ -871,7 +873,7 @@ static int kfilnd_tn_state_imm_send(struct kfilnd_transaction *tn,
                        hstatus = LNET_MSG_STATUS_REMOTE_ERROR;
 
                kfilnd_tn_status_update(tn, status, hstatus);
-               kfilnd_peer_down(tn->tn_kp);
+               kfilnd_peer_stale(tn->tn_kp);
                if (tn->msg_type == KFILND_MSG_HELLO_REQ)
                        kfilnd_peer_clear_hello_pending(tn->tn_kp);
                break;
@@ -1053,7 +1055,7 @@ static int kfilnd_tn_state_wait_comp(struct kfilnd_transaction *tn,
                        hstatus = LNET_MSG_STATUS_REMOTE_ERROR;
 
                kfilnd_tn_status_update(tn, status, hstatus);
-               kfilnd_peer_down(tn->tn_kp);
+               kfilnd_peer_stale(tn->tn_kp);
 
                /* Need to cancel the tagged receive to prevent resources from
                 * being leaked.
@@ -1137,7 +1139,7 @@ static int kfilnd_tn_state_wait_tag_rma_comp(struct kfilnd_transaction *tn,
                        hstatus = LNET_MSG_STATUS_REMOTE_ERROR;
 
                kfilnd_tn_status_update(tn, status, hstatus);
-               kfilnd_peer_down(tn->tn_kp);
+               kfilnd_peer_stale(tn->tn_kp);
                break;
 
        default:
@@ -1218,7 +1220,7 @@ static int kfilnd_tn_state_wait_tag_comp(struct kfilnd_transaction *tn,
                        hstatus = LNET_MSG_STATUS_REMOTE_ERROR;
 
                kfilnd_tn_status_update(tn, status, hstatus);
-               kfilnd_peer_down(tn->tn_kp);
+               kfilnd_peer_stale(tn->tn_kp);
                break;
 
        case TN_EVENT_TAG_TX_OK:
@@ -1244,7 +1246,7 @@ static int kfilnd_tn_state_fail(struct kfilnd_transaction *tn,
 
        switch (event) {
        case TN_EVENT_TX_FAIL:
-               kfilnd_peer_down(tn->tn_kp);
+               kfilnd_peer_stale(tn->tn_kp);
                break;
 
        case TN_EVENT_TX_OK:
@@ -1276,7 +1278,7 @@ static int kfilnd_tn_state_wait_timeout_tag_comp(struct kfilnd_transaction *tn,
        case TN_EVENT_TAG_RX_CANCEL:
                kfilnd_tn_status_update(tn, -ETIMEDOUT,
                                        LNET_MSG_STATUS_REMOTE_TIMEOUT);
-               kfilnd_peer_down(tn->tn_kp);
+               kfilnd_peer_stale(tn->tn_kp);
                break;
 
        case TN_EVENT_TAG_RX_FAIL: