LU-16214 kfilnd: Keep stale peer entries

author Chris Horn <chris.horn@hpe.com>

Fri, 19 Aug 2022 20:27:26 +0000 (14:27 -0600)

committer Oleg Drokin <green@whamcloud.com>

Thu, 19 Jan 2023 15:30:35 +0000 (15:30 +0000)
author Chris Horn <chris.horn@hpe.com>
Fri, 19 Aug 2022 20:27:26 +0000 (14:27 -0600)
committer Oleg Drokin <green@whamcloud.com>
Thu, 19 Jan 2023 15:30:35 +0000 (15:30 +0000)
diff --git a/lnet/klnds/kfilnd/kfilnd.c b/lnet/klnds/kfilnd/kfilnd.c

index d926b3f..e4efdd4 100644 (file)
--- a/lnet/klnds/kfilnd/kfilnd.c
+++ b/lnet/klnds/kfilnd/kfilnd.c
@@ -161,7 +161,10 @@ static int kfilnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *msg)
  
         if (kfilnd_peer_needs_hello(tn->tn_kp)) {
                 rc = kfilnd_send_hello_request(dev, cpt, tn->tn_kp);
-               if (rc) {
+               if (rc && kfilnd_peer_is_new_peer(tn->tn_kp)) {
+                       /* Only fail the send if this is a new peer. Otherwise
+                        * attempt the send using our stale peer information
+                        */
                         kfilnd_tn_free(tn);
                         return rc;
                 }
diff --git a/lnet/klnds/kfilnd/kfilnd.h b/lnet/klnds/kfilnd/kfilnd.h

index c027d7a..4c21532 100644 (file)
--- a/lnet/klnds/kfilnd/kfilnd.h
+++ b/lnet/klnds/kfilnd/kfilnd.h
@@ -221,6 +221,13 @@ struct kfilnd_ep {
         struct kfilnd_immediate_buffer end_immed_bufs[];
  };
  
+/* Newly allocated peer */
+#define KP_STATE_NEW 0x1
+/* Peer after successful hello handshake */
+#define KP_STATE_UPTODATE 0x2
+/* Peer experienced some sort of network failure */
+#define KP_STATE_STALE 0x3
+
  struct kfilnd_peer {
         struct rhash_head kp_node;
         struct rcu_head kp_rcu_head;
@@ -236,6 +243,7 @@ struct kfilnd_peer {
         u32 kp_remote_session_key;
         atomic_t kp_hello_pending;
         time64_t kp_hello_ts;
+       atomic_t kp_state;
  };
  
  static inline bool kfilnd_peer_deleted(struct kfilnd_peer *kp)
@@ -259,7 +267,7 @@ static inline void kfilnd_peer_clear_hello_pending(struct kfilnd_peer *kp)
  
  static inline bool kfilnd_peer_is_new_peer(struct kfilnd_peer *kp)
  {
-       return kp->kp_version == 0;
+       return atomic_read(&kp->kp_state) == KP_STATE_NEW;
  }
  
  /* Peer needs hello if it is not up to date and there is not already a hello
@@ -271,7 +279,7 @@ static inline bool kfilnd_peer_is_new_peer(struct kfilnd_peer *kp)
  static inline bool kfilnd_peer_needs_hello(struct kfilnd_peer *kp)
  {
         if (atomic_read(&kp->kp_hello_pending) == 0) {
-               if (kfilnd_peer_is_new_peer(kp))
+               if (atomic_read(&kp->kp_state) != KP_STATE_UPTODATE)
                         return true;
         } else if (ktime_before(kp->kp_hello_ts + lnet_get_lnd_timeout(),
                                 ktime_get_seconds())) {
diff --git a/lnet/klnds/kfilnd/kfilnd_peer.c b/lnet/klnds/kfilnd/kfilnd_peer.c

index e6eb8c4..d65a21b 100644 (file)
--- a/lnet/klnds/kfilnd/kfilnd_peer.c
+++ b/lnet/klnds/kfilnd/kfilnd_peer.c
@@ -56,10 +56,33 @@ static void kfilnd_peer_free(void *ptr, void *arg)
  }
  
  /**
- * kfilnd_peer_down() - Mark a peer as down.
- * @kp: Peer to be downed.
+ * kfilnd_peer_stale() - Mark a peer as stale.
+ * @kp: Peer to be marked stale
+ * Note: only "up-to-date" peers can be marked stale. If we haven't completed
+ * a transaction with this peer within 5 LND timeouts then delete this peer.
   */
-void kfilnd_peer_down(struct kfilnd_peer *kp)
+void kfilnd_peer_stale(struct kfilnd_peer *kp)
+{
+       if (atomic_cmpxchg(&kp->kp_state,
+                          KP_STATE_UPTODATE,
+                          KP_STATE_STALE) == KP_STATE_UPTODATE) {
+               CDEBUG(D_NET, "%s(%p):0x%llx is stale\n",
+                      libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr);
+       } else if (ktime_before(kp->kp_last_alive + lnet_get_lnd_timeout() * 5,
+                              ktime_get_seconds())) {
+               CDEBUG(D_NET,
+                      "Haven't heard from %s(%p):0x%llx in %lld seconds\n",
+                      libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr,
+                      ktime_sub(ktime_get_seconds(), kp->kp_last_alive));
+               kfilnd_peer_del(kp);
+       }
+}
+
+/**
+ * kfilnd_peer_del() - Mark a peer for deletion
+ * @kp: Peer to be deleted
+ */
+void kfilnd_peer_del(struct kfilnd_peer *kp)
  {
         if (atomic_cmpxchg(&kp->kp_remove_peer, 0, 1) == 0) {
                 struct lnet_nid peer_nid;
@@ -173,6 +196,7 @@ again:
         atomic_set(&kp->kp_rx_base, 0);
         atomic_set(&kp->kp_remove_peer, 0);
         atomic_set(&kp->kp_hello_pending, 0);
+       atomic_set(&kp->kp_state, KP_STATE_NEW);
         kp->kp_local_session_key = kfilnd_dev_get_session_key(dev);
         kp->kp_hello_ts = ktime_get_seconds();
  
@@ -291,6 +315,10 @@ void kfilnd_peer_process_hello(struct kfilnd_peer *kp, struct kfilnd_msg *msg)
                        msg->proto.hello.version);
         }
  
+       atomic_set(&kp->kp_state, KP_STATE_UPTODATE);
+       CDEBUG(D_NET, "kp %s(%p):0x%llx is up-to-date\n",
+              libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr);
+
         /* Clear kp_hello_pending if we've received the hello response,
          * otherwise this is an incoming hello request and we may have our
          * own hello request to this peer still outstanding
diff --git a/lnet/klnds/kfilnd/kfilnd_peer.h b/lnet/klnds/kfilnd/kfilnd_peer.h

index 91cdc1b..612c883 100644 (file)
--- a/lnet/klnds/kfilnd/kfilnd_peer.h
+++ b/lnet/klnds/kfilnd/kfilnd_peer.h
@@ -34,7 +34,8 @@
  
  #include "kfilnd.h"
  
-void kfilnd_peer_down(struct kfilnd_peer *kp);
+void kfilnd_peer_stale(struct kfilnd_peer *kp);
+void kfilnd_peer_del(struct kfilnd_peer *kp);
  void kfilnd_peer_put(struct kfilnd_peer *kp);
  struct kfilnd_peer *kfilnd_peer_get(struct kfilnd_dev *dev, lnet_nid_t nid);
  void kfilnd_peer_alive(struct kfilnd_peer *kp);
diff --git a/lnet/klnds/kfilnd/kfilnd_tn.c b/lnet/klnds/kfilnd/kfilnd_tn.c

index b9f94aa..9518672 100644 (file)
--- a/lnet/klnds/kfilnd/kfilnd_tn.c
+++ b/lnet/klnds/kfilnd/kfilnd_tn.c
@@ -793,6 +793,8 @@ static int kfilnd_tn_state_idle(struct kfilnd_transaction *tn,
         case TN_EVENT_RX_HELLO:
                 msg = tn->tn_rx_msg.msg;
  
+               kfilnd_peer_alive(tn->tn_kp);
+
                 switch (msg->type) {
                 case KFILND_MSG_HELLO_REQ:
                         kfilnd_peer_process_hello(tn->tn_kp, msg);
@@ -871,7 +873,7 @@ static int kfilnd_tn_state_imm_send(struct kfilnd_transaction *tn,
                         hstatus = LNET_MSG_STATUS_REMOTE_ERROR;
  
                 kfilnd_tn_status_update(tn, status, hstatus);
-               kfilnd_peer_down(tn->tn_kp);
+               kfilnd_peer_stale(tn->tn_kp);
                 if (tn->msg_type == KFILND_MSG_HELLO_REQ)
                         kfilnd_peer_clear_hello_pending(tn->tn_kp);
                 break;
@@ -1053,7 +1055,7 @@ static int kfilnd_tn_state_wait_comp(struct kfilnd_transaction *tn,
                         hstatus = LNET_MSG_STATUS_REMOTE_ERROR;
  
                 kfilnd_tn_status_update(tn, status, hstatus);
-               kfilnd_peer_down(tn->tn_kp);
+               kfilnd_peer_stale(tn->tn_kp);
  
                 /* Need to cancel the tagged receive to prevent resources from
                  * being leaked.
@@ -1137,7 +1139,7 @@ static int kfilnd_tn_state_wait_tag_rma_comp(struct kfilnd_transaction *tn,
                         hstatus = LNET_MSG_STATUS_REMOTE_ERROR;
  
                 kfilnd_tn_status_update(tn, status, hstatus);
-               kfilnd_peer_down(tn->tn_kp);
+               kfilnd_peer_stale(tn->tn_kp);
                 break;
  
         default:
@@ -1218,7 +1220,7 @@ static int kfilnd_tn_state_wait_tag_comp(struct kfilnd_transaction *tn,
                         hstatus = LNET_MSG_STATUS_REMOTE_ERROR;
  
                 kfilnd_tn_status_update(tn, status, hstatus);
-               kfilnd_peer_down(tn->tn_kp);
+               kfilnd_peer_stale(tn->tn_kp);
                 break;
  
         case TN_EVENT_TAG_TX_OK:
@@ -1244,7 +1246,7 @@ static int kfilnd_tn_state_fail(struct kfilnd_transaction *tn,
  
         switch (event) {
         case TN_EVENT_TX_FAIL:
-               kfilnd_peer_down(tn->tn_kp);
+               kfilnd_peer_stale(tn->tn_kp);
                 break;
  
         case TN_EVENT_TX_OK:
@@ -1276,7 +1278,7 @@ static int kfilnd_tn_state_wait_timeout_tag_comp(struct kfilnd_transaction *tn,
         case TN_EVENT_TAG_RX_CANCEL:
                 kfilnd_tn_status_update(tn, -ETIMEDOUT,
                                         LNET_MSG_STATUS_REMOTE_TIMEOUT);
-               kfilnd_peer_down(tn->tn_kp);
+               kfilnd_peer_stale(tn->tn_kp);
                 break;
  
         case TN_EVENT_TAG_RX_FAIL:
author	Chris Horn <chris.horn@hpe.com>
	Fri, 19 Aug 2022 20:27:26 +0000 (14:27 -0600)
committer	Oleg Drokin <green@whamcloud.com>
	Thu, 19 Jan 2023 15:30:35 +0000 (15:30 +0000)
lnet/klnds/kfilnd/kfilnd.c		patch \| blob \| history
lnet/klnds/kfilnd/kfilnd.h		patch \| blob \| history
lnet/klnds/kfilnd/kfilnd_peer.c		patch \| blob \| history
lnet/klnds/kfilnd/kfilnd_peer.h		patch \| blob \| history
lnet/klnds/kfilnd/kfilnd_tn.c		patch \| blob \| history