Whamcloud - gitweb
LU-16213 kfilnd: Allow one HELLO in-flight per peer 83/48783/4
authorChris Horn <chris.horn@hpe.com>
Fri, 19 Aug 2022 19:48:37 +0000 (13:48 -0600)
committerOleg Drokin <green@whamcloud.com>
Thu, 19 Jan 2023 15:30:13 +0000 (15:30 +0000)
Allow one HELLO message to be in-flight, per peer, at one time.
Accomplished by adding a flag to struct kfilnd_peer to indicate
whether a hello request has been sent to the peer. Cleared if the
send fails or when the hello response is received.

To detect situation where hello response is never received we add
kp_hello_ts to struct kfilnd_peer to record timestamp of when the
hello request was sent. If this is more than LND timeout seconds in
the past then we may send another hello.

Fix return value of kfilnd_send() when we're unable to allocate a
kfilnd_tn for the hello.

There's some code duplication with updating a peer based on hello
request and response. Consolidate processing of these hello messages
into a single function.

A race exists where a peer can be marked for removal in between a call
to kfilnd_peer_needs_hello() and the call to kfilnd_tn_alloc() inside
kfilnd_send_hello_request(). This would cause a hello request to be
sent to a new peer, created by kfilnd_peer_get() inside
kfilnd_tn_alloc(), without properly setting the kp_hello_pending flag
on that new peer. To avoid this situation, introduce
kfilnd_tn_alloc_for_peer() which takes a struct kfilnd_peer pointer
as an argument to assign to kfilnd_transaction::tn_kp. Use this to
allocate the kfilnd_transaction for the hello request inside
kfilnd_send_hello_request().

HPE-bug-id: LUS-11128
Test-Parameters: trivial
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I6bb0928a629cb398c270366fae6d1040ad67df3f
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48783
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Ian Ziemba <ian.ziemba@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Ron Gredvig <ron.gredvig@hpe.com>
lnet/klnds/kfilnd/kfilnd.c
lnet/klnds/kfilnd/kfilnd.h
lnet/klnds/kfilnd/kfilnd_peer.c
lnet/klnds/kfilnd/kfilnd_peer.h
lnet/klnds/kfilnd/kfilnd_tn.c
lnet/klnds/kfilnd/kfilnd_tn.h

index b74f58e..d926b3f 100644 (file)
@@ -65,18 +65,30 @@ static int kfilnd_send_cpt(struct kfilnd_dev *dev, lnet_nid_t nid)
        return  dev->kfd_endpoints[nid % dev->kfd_ni->ni_ncpts]->end_cpt;
 }
 
-int kfilnd_send_hello_request(struct kfilnd_dev *dev, int cpt, lnet_nid_t nid)
+int kfilnd_send_hello_request(struct kfilnd_dev *dev, int cpt,
+                             struct kfilnd_peer *kp)
 {
        struct kfilnd_transaction *tn;
        int rc;
 
-       tn = kfilnd_tn_alloc(dev, cpt, nid, true, true, false);
+       if (kfilnd_peer_set_check_hello_pending(kp)) {
+               CDEBUG(D_NET, "Hello already pending to peer %s(%px)\n",
+                      libcfs_nid2str(kp->kp_nid), kp);
+               return 0;
+       }
+
+       tn = kfilnd_tn_alloc_for_peer(dev, cpt, kp, true, true, false);
        if (IS_ERR(tn)) {
                rc = PTR_ERR(tn);
                CERROR("Failed to allocate transaction struct: rc=%d\n", rc);
+               kfilnd_peer_clear_hello_pending(kp);
                return rc;
        }
 
+       tn->msg_type = KFILND_MSG_HELLO_REQ;
+
+       kp->kp_hello_ts = ktime_get_seconds();
+
        kfilnd_tn_event_handler(tn, TN_EVENT_TX_HELLO, 0);
 
        return 0;
@@ -147,12 +159,11 @@ static int kfilnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *msg)
                return rc;
        }
 
-       /* Need to fire off special transaction if this is a new peer. */
-       if (kfilnd_peer_is_new_peer(tn->tn_kp)) {
-               rc = kfilnd_send_hello_request(dev, cpt, tgt_nid4);
+       if (kfilnd_peer_needs_hello(tn->tn_kp)) {
+               rc = kfilnd_send_hello_request(dev, cpt, tn->tn_kp);
                if (rc) {
                        kfilnd_tn_free(tn);
-                       return 0;
+                       return rc;
                }
        }
 
index 64b126e..076085d 100644 (file)
@@ -234,23 +234,53 @@ struct kfilnd_peer {
        u16 kp_version;
        u32 kp_local_session_key;
        u32 kp_remote_session_key;
+       atomic_t kp_hello_pending;
+       time64_t kp_hello_ts;
 };
 
-static inline bool kfilnd_peer_is_new_peer(struct kfilnd_peer *kp)
+/* Sets kp_hello_sending
+ * Returns true if it was already set
+ * Returns false otherwise
+ */
+static inline bool kfilnd_peer_set_check_hello_pending(struct kfilnd_peer *kp)
 {
-       return kp->kp_version == 0;
+       return (atomic_cmpxchg(&kp->kp_hello_pending, 0, 1) == 1);
+}
+
+static inline void kfilnd_peer_clear_hello_pending(struct kfilnd_peer *kp)
+{
+       atomic_set(&kp->kp_hello_pending, 0);
 }
 
-static inline void kfilnd_peer_set_version(struct kfilnd_peer *kp,
-                                          u16 version)
+static inline bool kfilnd_peer_is_new_peer(struct kfilnd_peer *kp)
 {
-       kp->kp_version = version;
+       return kp->kp_version == 0;
 }
 
-static inline void kfilnd_peer_set_remote_session_key(struct kfilnd_peer *kp,
-                                                     u32 session_key)
+/* Peer needs hello if it is not up to date and there is not already a hello
+ * in flight.
+ *
+ * If hello was sent more than LND timeout seconds ago, and we never received a
+ * response, then send another one.
+ */
+static inline bool kfilnd_peer_needs_hello(struct kfilnd_peer *kp)
 {
-       kp->kp_remote_session_key = session_key;
+       if (atomic_read(&kp->kp_hello_pending) == 0) {
+               if (kfilnd_peer_is_new_peer(kp))
+                       return true;
+       } else if (ktime_before(kp->kp_hello_ts + lnet_get_lnd_timeout(),
+                               ktime_get_seconds())) {
+               /* Sent hello but never received reply */
+               CDEBUG(D_NET,
+                      "No response from %s(%p):0x%llx after %lld\n",
+                      libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr,
+                      ktime_sub(ktime_get_seconds(), kp->kp_hello_ts));
+
+               kfilnd_peer_clear_hello_pending(kp);
+               return true;
+       }
+
+       return false;
 }
 
 struct kfilnd_fab {
@@ -700,6 +730,7 @@ struct kfilnd_transaction {
        enum kfilnd_msg_type msg_type;
 };
 
-int kfilnd_send_hello_request(struct kfilnd_dev *dev, int cpt, lnet_nid_t nid);
+int kfilnd_send_hello_request(struct kfilnd_dev *dev, int cpt,
+                             struct kfilnd_peer *kp);
 
 #endif /* _KFILND_ */
index de780a3..e6eb8c4 100644 (file)
@@ -172,7 +172,9 @@ again:
        kp->kp_nid = nid;
        atomic_set(&kp->kp_rx_base, 0);
        atomic_set(&kp->kp_remove_peer, 0);
+       atomic_set(&kp->kp_hello_pending, 0);
        kp->kp_local_session_key = kfilnd_dev_get_session_key(dev);
+       kp->kp_hello_ts = ktime_get_seconds();
 
        /* One reference for the allocation and another for get operation
         * performed for this peer. The allocation reference is returned when
@@ -234,20 +236,6 @@ kfi_addr_t kfilnd_peer_get_kfi_addr(struct kfilnd_peer *kp)
 }
 
 /**
- * kfilnd_peer_update_rx_contexts() - Update the RX context for a peer.
- * @kp: Peer to be updated.
- * @rx_base: New RX base for peer.
- * @rx_count: New RX count for peer.
- */
-void kfilnd_peer_update_rx_contexts(struct kfilnd_peer *kp,
-                                   unsigned int rx_base, unsigned int rx_count)
-{
-       /* TODO: Support RX count. */
-       LASSERT(rx_count > 0);
-       atomic_set(&kp->kp_rx_base, rx_base);
-}
-
-/**
  * kfilnd_peer_alive() - Update when the peer was last alive.
  * @kp: Peer to be updated.
  */
@@ -276,3 +264,37 @@ void kfilnd_peer_init(struct kfilnd_dev *dev)
 {
        rhashtable_init(&dev->peer_cache, &peer_cache_params);
 }
+
+void kfilnd_peer_process_hello(struct kfilnd_peer *kp, struct kfilnd_msg *msg)
+{
+       /* TODO: Support RX count. */
+       LASSERT(msg->proto.hello.rx_count > 0);
+       atomic_set(&kp->kp_rx_base, msg->proto.hello.rx_base);
+
+       kp->kp_remote_session_key = msg->proto.hello.session_key;
+
+       /* If processing an incoming hello request, then negotiate kfilnd
+        * version to the minimum implemented kfilnd version.
+        */
+       if (msg->type == KFILND_MSG_HELLO_REQ) {
+               kp->kp_version = min_t(__u16, KFILND_MSG_VERSION,
+                                      msg->proto.hello.version);
+               CDEBUG(D_NET,
+                      "Peer %s(%p):0x%llx version: %u; local version %u; negotiated version: %u\n",
+                      libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr,
+                      msg->proto.hello.version, KFILND_MSG_VERSION,
+                      kp->kp_version);
+       } else {
+               kp->kp_version = msg->proto.hello.version;
+               CDEBUG(D_NET,"Peer %s(%p):0x%llx negotiated version: %u\n",
+                      libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr,
+                      msg->proto.hello.version);
+       }
+
+       /* Clear kp_hello_pending if we've received the hello response,
+        * otherwise this is an incoming hello request and we may have our
+        * own hello request to this peer still outstanding
+        */
+       if (msg->type == KFILND_MSG_HELLO_RSP)
+               kfilnd_peer_clear_hello_pending(kp);
+}
index 4e18e95..91cdc1b 100644 (file)
 void kfilnd_peer_down(struct kfilnd_peer *kp);
 void kfilnd_peer_put(struct kfilnd_peer *kp);
 struct kfilnd_peer *kfilnd_peer_get(struct kfilnd_dev *dev, lnet_nid_t nid);
-void kfilnd_peer_update_rx_contexts(struct kfilnd_peer *kp,
-                                   unsigned int rx_base,
-                                   unsigned int rx_count);
 void kfilnd_peer_alive(struct kfilnd_peer *kp);
 void kfilnd_peer_destroy(struct kfilnd_dev *dev);
 void kfilnd_peer_init(struct kfilnd_dev *dev);
 kfi_addr_t kfilnd_peer_get_kfi_addr(struct kfilnd_peer *kp);
 u16 kfilnd_peer_target_rx_base(struct kfilnd_peer *kp);
+void kfilnd_peer_process_hello(struct kfilnd_peer *kp, struct kfilnd_msg *msg);
 
 #endif /* _KFILND_PEER_ */
index e36fd61..36bc689 100644 (file)
@@ -681,6 +681,8 @@ static int kfilnd_tn_state_idle(struct kfilnd_transaction *tn,
                                        "Failed to post send to %s(%#llx): rc=%d",
                                        libcfs_nid2str(tn->tn_kp->kp_nid),
                                        tn->tn_target_addr, rc);
+                       if (event == TN_EVENT_TX_HELLO)
+                               kfilnd_peer_clear_hello_pending(tn->tn_kp);
                        kfilnd_tn_status_update(tn, rc,
                                                LNET_MSG_STATUS_LOCAL_ERROR);
                }
@@ -718,23 +720,21 @@ static int kfilnd_tn_state_idle(struct kfilnd_transaction *tn,
                break;
 
        case TN_EVENT_RX_OK:
-               /* If TN_EVENT_RX_OK occurs on a new peer, this is a sign of a
-                * peer having a stale peer structure. Stale peer structures
-                * requires dropping the incoming message and initiating a hello
-                * handshake.
-                */
-               if (kfilnd_peer_is_new_peer(tn->tn_kp)) {
+               if (kfilnd_peer_needs_hello(tn->tn_kp)) {
                        rc = kfilnd_send_hello_request(tn->tn_ep->end_dev,
                                                       tn->tn_ep->end_cpt,
-                                                      tn->tn_kp->kp_nid);
+                                                      tn->tn_kp);
                        if (rc)
                                KFILND_TN_ERROR(tn,
                                                "Failed to send hello request: rc=%d",
                                                rc);
+                       rc = 0;
+               }
 
-                       /* Need to drop this message since it is uses stale
-                        * peer.
-                        */
+               /* If this is a new peer then we cannot progress the transaction
+                * and must drop it
+                */
+               if (kfilnd_peer_is_new_peer(tn->tn_kp)) {
                        KFILND_TN_ERROR(tn,
                                        "Dropping message from %s due to stale peer",
                                        libcfs_nid2str(tn->tn_kp->kp_nid));
@@ -787,24 +787,7 @@ static int kfilnd_tn_state_idle(struct kfilnd_transaction *tn,
 
                switch (msg->type) {
                case KFILND_MSG_HELLO_REQ:
-                       kfilnd_peer_update_rx_contexts(tn->tn_kp,
-                                                      msg->proto.hello.rx_base,
-                                                      msg->proto.hello.rx_count);
-                       kfilnd_peer_set_remote_session_key(tn->tn_kp,
-                                                          msg->proto.hello.session_key);
-
-                       /* Negotiate kfilnd version used between peers. Fallback
-                        * to the minimum implemented kfilnd version.
-                        */
-                       kfilnd_peer_set_version(tn->tn_kp,
-                                               min_t(__u16, KFILND_MSG_VERSION,
-                                                   msg->proto.hello.version));
-                       KFILND_TN_DEBUG(tn,
-                                       "Peer kfilnd version: %u; Local kfilnd version: %u; Negotiated kfilnd verions: %u",
-                                       msg->proto.hello.version,
-                                       KFILND_MSG_VERSION,
-                                       tn->tn_kp->kp_version);
-
+                       kfilnd_peer_process_hello(tn->tn_kp, msg);
                        tn->tn_target_addr = kfilnd_peer_get_kfi_addr(tn->tn_kp);
                        KFILND_TN_DEBUG(tn, "Using peer %s(%#llx)",
                                        libcfs_nid2str(tn->tn_kp->kp_nid),
@@ -837,15 +820,7 @@ static int kfilnd_tn_state_idle(struct kfilnd_transaction *tn,
 
                case KFILND_MSG_HELLO_RSP:
                        rc = 0;
-                       kfilnd_peer_update_rx_contexts(tn->tn_kp,
-                                                      msg->proto.hello.rx_base,
-                                                      msg->proto.hello.rx_count);
-                       kfilnd_peer_set_remote_session_key(tn->tn_kp,
-                                                          msg->proto.hello.session_key);
-                       kfilnd_peer_set_version(tn->tn_kp,
-                                               msg->proto.hello.version);
-                       KFILND_TN_DEBUG(tn, "Negotiated kfilnd version: %u",
-                                       msg->proto.hello.version);
+                       kfilnd_peer_process_hello(tn->tn_kp, msg);
                        finalize = true;
                        break;
 
@@ -889,6 +864,8 @@ static int kfilnd_tn_state_imm_send(struct kfilnd_transaction *tn,
 
                kfilnd_tn_status_update(tn, status, hstatus);
                kfilnd_peer_down(tn->tn_kp);
+               if (tn->msg_type == KFILND_MSG_HELLO_REQ)
+                       kfilnd_peer_clear_hello_pending(tn->tn_kp);
                break;
 
        case TN_EVENT_TX_OK:
@@ -1425,6 +1402,45 @@ struct kfilnd_transaction *kfilnd_tn_alloc(struct kfilnd_dev *dev, int cpt,
                                           bool key)
 {
        struct kfilnd_transaction *tn;
+       struct kfilnd_peer *kp;
+       int rc;
+
+       if (!dev) {
+               rc = -EINVAL;
+               goto err;
+       }
+
+       kp = kfilnd_peer_get(dev, target_nid);
+       if (IS_ERR(kp)) {
+               rc = PTR_ERR(kp);
+               goto err;
+       }
+
+       tn = kfilnd_tn_alloc_for_peer(dev, cpt, kp, alloc_msg, is_initiator,
+                                     key);
+       if (IS_ERR(tn)) {
+               rc = PTR_ERR(tn);
+               kfilnd_peer_put(kp);
+               goto err;
+       }
+
+       return tn;
+
+err:
+       return ERR_PTR(rc);
+}
+
+/* See kfilnd_tn_alloc()
+ * Note: Caller must have a reference on @kp
+ */
+struct kfilnd_transaction *kfilnd_tn_alloc_for_peer(struct kfilnd_dev *dev,
+                                                   int cpt,
+                                                   struct kfilnd_peer *kp,
+                                                   bool alloc_msg,
+                                                   bool is_initiator,
+                                                   bool key)
+{
+       struct kfilnd_transaction *tn;
        struct kfilnd_ep *ep;
        int rc;
        ktime_t tn_alloc_ts;
@@ -1467,11 +1483,8 @@ struct kfilnd_transaction *kfilnd_tn_alloc(struct kfilnd_dev *dev, int cpt,
                tn->tn_mr_key = rc;
        }
 
-       tn->tn_kp = kfilnd_peer_get(dev, target_nid);
-       if (IS_ERR(tn->tn_kp)) {
-               rc = PTR_ERR(tn->tn_kp);
-               goto err_put_mr_key;
-       }
+       tn->tn_kp = kp;
+       refcount_inc(&kp->kp_cnt);
 
        mutex_init(&tn->tn_lock);
        tn->tn_ep = ep;
@@ -1496,9 +1509,6 @@ struct kfilnd_transaction *kfilnd_tn_alloc(struct kfilnd_dev *dev, int cpt,
 
        return tn;
 
-err_put_mr_key:
-       if (key)
-               kfilnd_ep_put_key(ep, tn->tn_mr_key);
 err_free_tn:
        if (tn->tn_tx_msg.msg)
                kmem_cache_free(imm_buf_cache, tn->tn_tx_msg.msg);
index 237ac68..2f460a7 100644 (file)
@@ -40,6 +40,12 @@ struct kfilnd_transaction *kfilnd_tn_alloc(struct kfilnd_dev *dev, int cpt,
                                           lnet_nid_t target_nid,
                                           bool alloc_msg, bool is_initiator,
                                           bool key);
+struct kfilnd_transaction *kfilnd_tn_alloc_for_peer(struct kfilnd_dev *dev,
+                                                   int cpt,
+                                                   struct kfilnd_peer *kp,
+                                                   bool alloc_msg,
+                                                   bool is_initiator,
+                                                   bool key);
 void kfilnd_tn_event_handler(struct kfilnd_transaction *tn,
                             enum tn_events event, int status);
 void kfilnd_tn_cleanup(void);