LU-16451 kfilnd: Throttle traffic to down peers

author Chris Horn <chris.horn@hpe.com>

Fri, 28 Oct 2022 22:27:17 +0000 (16:27 -0600)

committer Oleg Drokin <green@whamcloud.com>

Thu, 19 Jan 2023 15:32:56 +0000 (15:32 +0000)
author Chris Horn <chris.horn@hpe.com>
Fri, 28 Oct 2022 22:27:17 +0000 (16:27 -0600)
committer Oleg Drokin <green@whamcloud.com>
Thu, 19 Jan 2023 15:32:56 +0000 (15:32 +0000)
diff --git a/lnet/klnds/kfilnd/kfilnd.h b/lnet/klnds/kfilnd/kfilnd.h

index b8243d8..3173959 100644 (file)
--- a/lnet/klnds/kfilnd/kfilnd.h
+++ b/lnet/klnds/kfilnd/kfilnd.h
@@ -227,6 +227,8 @@ struct kfilnd_ep {
  #define KP_STATE_UPTODATE 0x2
  /* Peer experienced some sort of network failure */
  #define KP_STATE_STALE 0x3
+/* We suspect this peer is actually down or otherwise unreachable */
+#define KP_STATE_DOWN 0x4
  
  struct kfilnd_peer {
         struct rhash_head kp_node;
@@ -270,6 +272,13 @@ static inline bool kfilnd_peer_is_new_peer(struct kfilnd_peer *kp)
         return atomic_read(&kp->kp_state) == KP_STATE_NEW;
  }
  
+static inline bool kfilnd_peer_needs_throttle(struct kfilnd_peer *kp)
+{
+       unsigned int kp_state = atomic_read(&kp->kp_state);
+
+       return (kp_state == KP_STATE_NEW || kp_state == KP_STATE_DOWN);
+}
+
  /* Peer needs hello if it is not up to date and there is not already a hello
   * in flight.
   *
diff --git a/lnet/klnds/kfilnd/kfilnd_peer.c b/lnet/klnds/kfilnd/kfilnd_peer.c

index d65a21b..412d2ee 100644 (file)
--- a/lnet/klnds/kfilnd/kfilnd_peer.c
+++ b/lnet/klnds/kfilnd/kfilnd_peer.c
@@ -56,20 +56,14 @@ static void kfilnd_peer_free(void *ptr, void *arg)
  }
  
  /**
- * kfilnd_peer_stale() - Mark a peer as stale.
- * @kp: Peer to be marked stale
- * Note: only "up-to-date" peers can be marked stale. If we haven't completed
- * a transaction with this peer within 5 LND timeouts then delete this peer.
+ * kfilnd_peer_purge_old_peer() - Delete the specified peer from the cache
+ * if we haven't heard from it within 5x LND timeouts.
+ * @kp: The peer to be checked or purged
   */
-void kfilnd_peer_stale(struct kfilnd_peer *kp)
+static void kfilnd_peer_purge_old_peer(struct kfilnd_peer *kp)
  {
-       if (atomic_cmpxchg(&kp->kp_state,
-                          KP_STATE_UPTODATE,
-                          KP_STATE_STALE) == KP_STATE_UPTODATE) {
-               CDEBUG(D_NET, "%s(%p):0x%llx is stale\n",
-                      libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr);
-       } else if (ktime_before(kp->kp_last_alive + lnet_get_lnd_timeout() * 5,
-                              ktime_get_seconds())) {
+       if (ktime_after(ktime_get_seconds(),
+                       kp->kp_last_alive + (lnet_get_lnd_timeout() * 5))) {
                 CDEBUG(D_NET,
                        "Haven't heard from %s(%p):0x%llx in %lld seconds\n",
                        libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr,
@@ -79,6 +73,63 @@ void kfilnd_peer_stale(struct kfilnd_peer *kp)
  }
  
  /**
+ * kfilnd_peer_stale() - Mark a peer as stale. If the peer is already stale then
+ * check whether it should be deleted.
+ * @kp: Peer to be marked stale
+ * Note: only "up-to-date" peers can be marked stale.
+ */
+static void kfilnd_peer_stale(struct kfilnd_peer *kp)
+{
+       if (atomic_cmpxchg(&kp->kp_state,
+                          KP_STATE_UPTODATE,
+                          KP_STATE_STALE) == KP_STATE_UPTODATE) {
+               CDEBUG(D_NET, "%s(%p):0x%llx uptodate -> stale\n",
+                      libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr);
+       } else {
+               kfilnd_peer_purge_old_peer(kp);
+       }
+}
+
+/**
+ * kfilnd_peer_down() - Mark a peer as down. If the peer is already down then
+ * check whether it should be deleted.
+ * @kp: Peer to be marked down
+ * Note: Only peers that are "up-to-date" or "stale" can be marked down.
+ */
+static void kfilnd_peer_down(struct kfilnd_peer *kp)
+{
+       if (atomic_read(&kp->kp_state) == KP_STATE_DOWN) {
+               kfilnd_peer_purge_old_peer(kp);
+       } else if (atomic_cmpxchg(&kp->kp_state,
+                                 KP_STATE_UPTODATE,
+                                 KP_STATE_DOWN) == KP_STATE_UPTODATE) {
+               CDEBUG(D_NET, "%s(%p):0x%llx uptodate -> down\n",
+                      libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr);
+       } else if (atomic_cmpxchg(&kp->kp_state,
+                                 KP_STATE_STALE,
+                                 KP_STATE_DOWN) == KP_STATE_STALE) {
+               CDEBUG(D_NET, "%s(%p):0x%llx stale -> down\n",
+                      libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr);
+       }
+}
+
+/**
+ * kfilnd_peer_tn_failed() - A transaction with this peer has failed. Mark the
+ * peer as either stale or down depending on the provided error value.
+ * @kp: The peer to be marked down or stale
+ * @error: An errno indicating why the transaction failed.
+ * Note: We currently only consider EHOSTUNREACH which corresponds to
+ * C_RC_UNDELIVERABLE, and ENOTCONN which corresponds to C_RC_VNI_NOT_FOUND.
+ */
+void kfilnd_peer_tn_failed(struct kfilnd_peer *kp, int error)
+{
+       if (error == -EHOSTUNREACH || error == -ENOTCONN)
+               kfilnd_peer_down(kp);
+       else
+               kfilnd_peer_stale(kp);
+}
+
+/**
   * kfilnd_peer_del() - Mark a peer for deletion
   * @kp: Peer to be deleted
   */
diff --git a/lnet/klnds/kfilnd/kfilnd_peer.h b/lnet/klnds/kfilnd/kfilnd_peer.h

index 612c883..963d802 100644 (file)
--- a/lnet/klnds/kfilnd/kfilnd_peer.h
+++ b/lnet/klnds/kfilnd/kfilnd_peer.h
@@ -34,7 +34,6 @@
  
  #include "kfilnd.h"
  
-void kfilnd_peer_stale(struct kfilnd_peer *kp);
  void kfilnd_peer_del(struct kfilnd_peer *kp);
  void kfilnd_peer_put(struct kfilnd_peer *kp);
  struct kfilnd_peer *kfilnd_peer_get(struct kfilnd_dev *dev, lnet_nid_t nid);
@@ -44,5 +43,6 @@ void kfilnd_peer_init(struct kfilnd_dev *dev);
  kfi_addr_t kfilnd_peer_get_kfi_addr(struct kfilnd_peer *kp);
  u16 kfilnd_peer_target_rx_base(struct kfilnd_peer *kp);
  void kfilnd_peer_process_hello(struct kfilnd_peer *kp, struct kfilnd_msg *msg);
+void kfilnd_peer_tn_failed(struct kfilnd_peer *kp, int error);
  
  #endif /* _KFILND_PEER_ */
diff --git a/lnet/klnds/kfilnd/kfilnd_tn.c b/lnet/klnds/kfilnd/kfilnd_tn.c

index 9666d85..a442b70 100644 (file)
--- a/lnet/klnds/kfilnd/kfilnd_tn.c
+++ b/lnet/klnds/kfilnd/kfilnd_tn.c
@@ -623,7 +623,7 @@ static int kfilnd_tn_state_idle(struct kfilnd_transaction *tn,
         /* For new peers, send a hello request message and queue the true LNet
          * message for replay.
          */
-       if (kfilnd_peer_is_new_peer(tn->tn_kp) &&
+       if (kfilnd_peer_needs_throttle(tn->tn_kp) &&
             (event == TN_EVENT_INIT_IMMEDIATE || event == TN_EVENT_INIT_BULK)) {
                 if (kfilnd_peer_deleted(tn->tn_kp)) {
                         /* We'll assign a NETWORK_TIMEOUT message health status
@@ -631,23 +631,19 @@ static int kfilnd_tn_state_idle(struct kfilnd_transaction *tn,
                          * for removal
                          */
                         rc = -ESTALE;
-                       KFILND_TN_DEBUG(tn,
-                                       "Dropping message to stale peer %s\n",
-                                       libcfs_nid2str(tn->tn_kp->kp_nid));
+                       KFILND_TN_DEBUG(tn, "Drop message to deleted peer");
                 } else if (kfilnd_peer_needs_hello(tn->tn_kp, false)) {
-                       /* This transaction was setup against a new peer, which
-                        * implies a HELLO was sent. If a HELLO is no longer
-                        * in flight then that means it has failed, and we
-                        * should cancel this TN. Otherwise we are stuck
-                        * waiting for the TN deadline.
+                       /* We're throttling transactions to this peer until
+                        * a handshake can be completed, but there is no HELLO
+                        * currently in flight. This implies the HELLO has
+                        * failed, and we should cancel this TN. Otherwise we
+                        * are stuck waiting for the TN deadline.
                          *
                          * We assign NETWORK_TIMEOUT health status below because
                          * we do not know why the HELLO failed.
                          */
                         rc = -ECANCELED;
-                       KFILND_TN_DEBUG(tn,
-                                       "Peer is new but there is no outstanding hello %s\n",
-                                       libcfs_nid2str(tn->tn_kp->kp_nid));
+                       KFILND_TN_DEBUG(tn, "Cancel throttled TN");
                 } else if (ktime_after(tn->deadline, ktime_get_seconds())) {
                         /* If transaction deadline has not been met, return
                          * -EAGAIN. This will cause this transaction event to be
@@ -655,8 +651,7 @@ static int kfilnd_tn_state_idle(struct kfilnd_transaction *tn,
                          * peer should occur at which point the kfilnd version
                          * should be negotiated.
                          */
-                       KFILND_TN_DEBUG(tn, "%s hello response pending",
-                                       libcfs_nid2str(tn->tn_kp->kp_nid));
+                       KFILND_TN_DEBUG(tn, "hello response pending");
                         return -EAGAIN;
                 } else {
                         rc = -ETIMEDOUT;
@@ -887,7 +882,7 @@ static int kfilnd_tn_state_imm_send(struct kfilnd_transaction *tn,
                         hstatus = LNET_MSG_STATUS_REMOTE_ERROR;
  
                 kfilnd_tn_status_update(tn, status, hstatus);
-               kfilnd_peer_stale(tn->tn_kp);
+               kfilnd_peer_tn_failed(tn->tn_kp, status);
                 if (tn->msg_type == KFILND_MSG_HELLO_REQ)
                         kfilnd_peer_clear_hello_pending(tn->tn_kp);
                 break;
@@ -1069,7 +1064,7 @@ static int kfilnd_tn_state_wait_comp(struct kfilnd_transaction *tn,
                         hstatus = LNET_MSG_STATUS_REMOTE_ERROR;
  
                 kfilnd_tn_status_update(tn, status, hstatus);
-               kfilnd_peer_stale(tn->tn_kp);
+               kfilnd_peer_tn_failed(tn->tn_kp, status);
  
                 /* Need to cancel the tagged receive to prevent resources from
                  * being leaked.
@@ -1153,7 +1148,7 @@ static int kfilnd_tn_state_wait_tag_rma_comp(struct kfilnd_transaction *tn,
                         hstatus = LNET_MSG_STATUS_REMOTE_ERROR;
  
                 kfilnd_tn_status_update(tn, status, hstatus);
-               kfilnd_peer_stale(tn->tn_kp);
+               kfilnd_peer_tn_failed(tn->tn_kp, status);
                 break;
  
         default:
@@ -1234,7 +1229,7 @@ static int kfilnd_tn_state_wait_tag_comp(struct kfilnd_transaction *tn,
                         hstatus = LNET_MSG_STATUS_REMOTE_ERROR;
  
                 kfilnd_tn_status_update(tn, status, hstatus);
-               kfilnd_peer_stale(tn->tn_kp);
+               kfilnd_peer_tn_failed(tn->tn_kp, status);
                 break;
  
         case TN_EVENT_TAG_TX_OK:
@@ -1260,7 +1255,7 @@ static int kfilnd_tn_state_fail(struct kfilnd_transaction *tn,
  
         switch (event) {
         case TN_EVENT_TX_FAIL:
-               kfilnd_peer_stale(tn->tn_kp);
+               kfilnd_peer_tn_failed(tn->tn_kp, status);
                 break;
  
         case TN_EVENT_TX_OK:
@@ -1292,7 +1287,7 @@ static int kfilnd_tn_state_wait_timeout_tag_comp(struct kfilnd_transaction *tn,
         case TN_EVENT_TAG_RX_CANCEL:
                 kfilnd_tn_status_update(tn, -ETIMEDOUT,
                                         LNET_MSG_STATUS_REMOTE_TIMEOUT);
-               kfilnd_peer_stale(tn->tn_kp);
+               kfilnd_peer_tn_failed(tn->tn_kp, -ETIMEDOUT);
                 break;
  
         case TN_EVENT_TAG_RX_FAIL:
author	Chris Horn <chris.horn@hpe.com>
	Fri, 28 Oct 2022 22:27:17 +0000 (16:27 -0600)
committer	Oleg Drokin <green@whamcloud.com>
	Thu, 19 Jan 2023 15:32:56 +0000 (15:32 +0000)
lnet/klnds/kfilnd/kfilnd.h		patch \| blob \| history
lnet/klnds/kfilnd/kfilnd_peer.c		patch \| blob \| history
lnet/klnds/kfilnd/kfilnd_peer.h		patch \| blob \| history
lnet/klnds/kfilnd/kfilnd_tn.c		patch \| blob \| history