Whamcloud - gitweb
i=liang,b=17546:
authorisaac <isaac>
Tue, 16 Jun 2009 22:22:58 +0000 (22:22 +0000)
committerisaac <isaac>
Tue, 16 Jun 2009 22:22:58 +0000 (22:22 +0000)
- fixed kptllnd HELLO protocol deadlock.

lnet/ChangeLog
lnet/klnds/ptllnd/ptllnd.h
lnet/klnds/ptllnd/ptllnd_peer.c
lnet/klnds/ptllnd/ptllnd_rx_buf.c

index af2618c..0d739c9 100644 (file)
@@ -18,6 +18,11 @@ Description:
 Details    : 
 
 Severity   : normal
 Details    : 
 
 Severity   : normal
+Bugzilla   : 17546
+Description: kptllnd HELLO protocol deadlock
+Details    : kptllnd HELLO protocol doesn't run to completion in finite time
+
+Severity   : normal
 Bugzilla   : 18075
 Description: LNet selftest fixes and enhancements
 
 Bugzilla   : 18075
 Description: LNet selftest fixes and enhancements
 
index 58a87b0..5f817cb 100755 (executable)
@@ -542,6 +542,8 @@ kptllnd_peer_unreserve_buffers(void)
 int  kptllnd_setup_tx_descs(void);
 void kptllnd_cleanup_tx_descs(void);
 void kptllnd_tx_fini(kptl_tx_t *tx);
 int  kptllnd_setup_tx_descs(void);
 void kptllnd_cleanup_tx_descs(void);
 void kptllnd_tx_fini(kptl_tx_t *tx);
+void kptllnd_cancel_txlist(struct list_head *peerq, struct list_head *txs);
+void kptllnd_restart_txs(lnet_process_id_t target, struct list_head *restarts);
 kptl_tx_t *kptllnd_get_idle_tx(enum kptl_tx_type purpose);
 void kptllnd_tx_callback(ptl_event_t *ev);
 const char *kptllnd_tx_typestr(int type);
 kptl_tx_t *kptllnd_get_idle_tx(enum kptl_tx_type purpose);
 void kptllnd_tx_callback(ptl_event_t *ev);
 const char *kptllnd_tx_typestr(int type);
index 2afede2..641a73c 100644 (file)
@@ -488,13 +488,32 @@ again:
 }
 
 void
 }
 
 void
+kptllnd_queue_tx(kptl_peer_t *peer, kptl_tx_t *tx)
+{
+        /* CAVEAT EMPTOR: I take over caller's ref on 'tx' */
+        unsigned long flags;
+
+        spin_lock_irqsave(&peer->peer_lock, flags);
+
+        /* Ensure HELLO is sent first */
+        if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_NOOP)
+                list_add(&tx->tx_list, &peer->peer_noops);
+        else if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_HELLO)
+                list_add(&tx->tx_list, &peer->peer_sendq);
+        else
+                list_add_tail(&tx->tx_list, &peer->peer_sendq);
+
+        spin_unlock_irqrestore(&peer->peer_lock, flags);
+}
+
+
+void
 kptllnd_post_tx(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag)
 {
         /* CAVEAT EMPTOR: I take over caller's ref on 'tx' */
         ptl_handle_md_t  msg_mdh;
         ptl_md_t         md;
         ptl_err_t        prc;
 kptllnd_post_tx(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag)
 {
         /* CAVEAT EMPTOR: I take over caller's ref on 'tx' */
         ptl_handle_md_t  msg_mdh;
         ptl_md_t         md;
         ptl_err_t        prc;
-        unsigned long    flags;
 
         LASSERT (!tx->tx_idle);
         LASSERT (!tx->tx_active);
 
         LASSERT (!tx->tx_idle);
         LASSERT (!tx->tx_active);
@@ -537,21 +556,54 @@ kptllnd_post_tx(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag)
                 return;
         }
 
                 return;
         }
 
-        spin_lock_irqsave(&peer->peer_lock, flags);
 
         tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * HZ);
         tx->tx_active = 1;
         tx->tx_msg_mdh = msg_mdh;
 
         tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * HZ);
         tx->tx_active = 1;
         tx->tx_msg_mdh = msg_mdh;
+        kptllnd_queue_tx(peer, tx);
+}
 
 
-       /* Ensure HELLO is sent first */
-        if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_NOOP)
-               list_add(&tx->tx_list, &peer->peer_noops);
-       else if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_HELLO)
-               list_add(&tx->tx_list, &peer->peer_sendq);
-       else
-               list_add_tail(&tx->tx_list, &peer->peer_sendq);
+/* NB "restarts" comes from peer_sendq of a single peer */
+void
+kptllnd_restart_txs (lnet_process_id_t target, struct list_head *restarts)
+{
+        kptl_tx_t   *tx;
+        kptl_tx_t   *tmp;
+        kptl_peer_t *peer;
 
 
-        spin_unlock_irqrestore(&peer->peer_lock, flags);
+        LASSERT (!list_empty(restarts));
+
+        if (kptllnd_find_target(&peer, target) != 0)
+                peer = NULL;
+
+        list_for_each_entry_safe (tx, tmp, restarts, tx_list) {
+                LASSERT (tx->tx_peer != NULL);
+                LASSERT (tx->tx_type == TX_TYPE_GET_REQUEST ||
+                         tx->tx_type == TX_TYPE_PUT_REQUEST ||
+                         tx->tx_type == TX_TYPE_SMALL_MESSAGE);
+
+                list_del_init(&tx->tx_list);
+
+                if (peer == NULL ||
+                    tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) {
+                        kptllnd_tx_decref(tx);
+                        continue;
+                }
+
+                LASSERT (tx->tx_msg->ptlm_type != PTLLND_MSG_TYPE_NOOP);
+                tx->tx_status = 0;
+                tx->tx_active = 1;
+                kptllnd_peer_decref(tx->tx_peer);
+                tx->tx_peer = NULL;
+                kptllnd_set_tx_peer(tx, peer);
+                kptllnd_queue_tx(peer, tx); /* takes over my ref on tx */
+        }
+
+        if (peer == NULL)
+                return;
+
+        kptllnd_peer_check_sends(peer);
+        kptllnd_peer_decref(peer);
 }
 
 static inline int
 }
 
 static inline int
@@ -1276,6 +1328,7 @@ kptllnd_find_target(kptl_peer_t **peerp, lnet_process_id_t target)
                 return -ENOMEM;
         }
 
                 return -ENOMEM;
         }
 
+        hello_tx->tx_acked = 1;
         kptllnd_init_msg(hello_tx->tx_msg, PTLLND_MSG_TYPE_HELLO,
                          sizeof(kptl_hello_msg_t));
 
         kptllnd_init_msg(hello_tx->tx_msg, PTLLND_MSG_TYPE_HELLO,
                          sizeof(kptl_hello_msg_t));
 
index 149e726..90c654a 100644 (file)
@@ -544,14 +544,17 @@ void
 kptllnd_rx_parse(kptl_rx_t *rx)
 {
         kptl_msg_t             *msg = rx->rx_msg;
 kptllnd_rx_parse(kptl_rx_t *rx)
 {
         kptl_msg_t             *msg = rx->rx_msg;
+        int                     rc = 0;
         int                     post_credit = PTLLND_POSTRX_PEER_CREDIT;
         kptl_peer_t            *peer;
         int                     post_credit = PTLLND_POSTRX_PEER_CREDIT;
         kptl_peer_t            *peer;
-        int                     rc;
+        struct list_head        txs;
         unsigned long           flags;
         lnet_process_id_t       srcid;
 
         LASSERT (rx->rx_peer == NULL);
 
         unsigned long           flags;
         lnet_process_id_t       srcid;
 
         LASSERT (rx->rx_peer == NULL);
 
+        INIT_LIST_HEAD(&txs);
+
         if ((rx->rx_nob >= 4 &&
              (msg->ptlm_magic == LNET_PROTO_MAGIC ||
               msg->ptlm_magic == __swab32(LNET_PROTO_MAGIC))) ||
         if ((rx->rx_nob >= 4 &&
              (msg->ptlm_magic == LNET_PROTO_MAGIC ||
               msg->ptlm_magic == __swab32(LNET_PROTO_MAGIC))) ||
@@ -599,10 +602,19 @@ kptllnd_rx_parse(kptl_rx_t *rx)
                 if (peer == NULL)
                         goto rx_done;
                 
                 if (peer == NULL)
                         goto rx_done;
                 
-                CWARN("NAK from %s (%s)\n",
-                      libcfs_id2str(srcid),
+                CWARN("NAK from %s (%d:%s)\n",
+                      libcfs_id2str(srcid), peer->peer_state,
                       kptllnd_ptlid2str(rx->rx_initiator));
 
                       kptllnd_ptlid2str(rx->rx_initiator));
 
+                /* NB can't nuke new peer - bug 17546 comment 31 */
+                if (peer->peer_state == PEER_STATE_WAITING_HELLO) {
+                        CDEBUG(D_NET, "Stale NAK from %s(%s): WAITING_HELLO\n",
+                               libcfs_id2str(srcid),
+                               kptllnd_ptlid2str(rx->rx_initiator));
+                        kptllnd_peer_decref(peer);
+                        goto rx_done;
+                }
+
                 rc = -EPROTO;
                 goto failed;
         }
                 rc = -EPROTO;
                 goto failed;
         }
@@ -627,36 +639,49 @@ kptllnd_rx_parse(kptl_rx_t *rx)
         } else {
                 peer = kptllnd_id2peer(srcid);
                 if (peer == NULL) {
         } else {
                 peer = kptllnd_id2peer(srcid);
                 if (peer == NULL) {
-                        CWARN("NAK %s: no connection; peer must reconnect\n",
+                        CWARN("NAK %s: no connection, %s must reconnect\n",
+                              kptllnd_msgtype2str(msg->ptlm_type),
                               libcfs_id2str(srcid));
                         /* NAK to make the peer reconnect */
                         kptllnd_nak(rx);
                         goto rx_done;
                 }
 
                               libcfs_id2str(srcid));
                         /* NAK to make the peer reconnect */
                         kptllnd_nak(rx);
                         goto rx_done;
                 }
 
-                /* Ignore anything apart from HELLO while I'm waiting for it and
-                 * any messages for a previous incarnation of the connection */
-                if (peer->peer_state == PEER_STATE_WAITING_HELLO ||
-                    msg->ptlm_dststamp < peer->peer_myincarnation) {
+                /* Ignore any messages for a previous incarnation of me */
+                if (msg->ptlm_dststamp < peer->peer_myincarnation) {
                         kptllnd_peer_decref(peer);
                         goto rx_done;
                 }
 
                         kptllnd_peer_decref(peer);
                         goto rx_done;
                 }
 
-                if (msg->ptlm_srcstamp != peer->peer_incarnation) {
-                        CERROR("%s: Unexpected srcstamp "LPX64" "
+                if (msg->ptlm_dststamp != peer->peer_myincarnation) {
+                        CERROR("%s: Unexpected dststamp "LPX64" "
                                "("LPX64" expected)\n",
                                "("LPX64" expected)\n",
-                               libcfs_id2str(peer->peer_id),
-                               msg->ptlm_srcstamp,
-                               peer->peer_incarnation);
+                               libcfs_id2str(peer->peer_id), msg->ptlm_dststamp,
+                               peer->peer_myincarnation);
                         rc = -EPROTO;
                         goto failed;
                 }
 
                         rc = -EPROTO;
                         goto failed;
                 }
 
-                if (msg->ptlm_dststamp != peer->peer_myincarnation) {
-                        CERROR("%s: Unexpected dststamp "LPX64" "
+                if (peer->peer_state == PEER_STATE_WAITING_HELLO) {
+                        /* recoverable error - restart txs */
+                        spin_lock_irqsave(&peer->peer_lock, flags);
+                        kptllnd_cancel_txlist(&peer->peer_sendq, &txs);
+                        spin_unlock_irqrestore(&peer->peer_lock, flags);
+
+                        CWARN("NAK %s: Unexpected %s message\n",
+                              libcfs_id2str(srcid),
+                              kptllnd_msgtype2str(msg->ptlm_type));
+                        kptllnd_nak(rx);
+                        rc = -EPROTO;
+                        goto failed;
+                }
+
+                if (msg->ptlm_srcstamp != peer->peer_incarnation) {
+                        CERROR("%s: Unexpected srcstamp "LPX64" "
                                "("LPX64" expected)\n",
                                "("LPX64" expected)\n",
-                               libcfs_id2str(peer->peer_id), msg->ptlm_dststamp,
-                               peer->peer_myincarnation);
+                               libcfs_id2str(srcid),
+                               msg->ptlm_srcstamp,
+                               peer->peer_incarnation);
                         rc = -EPROTO;
                         goto failed;
                 }
                         rc = -EPROTO;
                         goto failed;
                 }
@@ -677,6 +702,7 @@ kptllnd_rx_parse(kptl_rx_t *rx)
 
                 CERROR("%s: buffer overrun [%d/%d+%d]\n",
                        libcfs_id2str(peer->peer_id), c, sc, oc);
 
                 CERROR("%s: buffer overrun [%d/%d+%d]\n",
                        libcfs_id2str(peer->peer_id), c, sc, oc);
+                rc = -EPROTO;
                 goto failed;
         }
         peer->peer_sent_credits--;
                 goto failed;
         }
         peer->peer_sent_credits--;
@@ -752,12 +778,15 @@ kptllnd_rx_parse(kptl_rx_t *rx)
                 if (rc >= 0)                    /* kptllnd_recv owns 'rx' now */
                         return;
                 goto failed;
                 if (rc >= 0)                    /* kptllnd_recv owns 'rx' now */
                         return;
                 goto failed;
-         }
+        }
 
  failed:
 
  failed:
+        LASSERT (rc != 0);
         kptllnd_peer_close(peer, rc);
         if (rx->rx_peer == NULL)                /* drop ref on peer */
                 kptllnd_peer_decref(peer);      /* unless rx_done will */
         kptllnd_peer_close(peer, rc);
         if (rx->rx_peer == NULL)                /* drop ref on peer */
                 kptllnd_peer_decref(peer);      /* unless rx_done will */
+        if (!list_empty(&txs))
+                kptllnd_restart_txs(srcid, &txs);
  rx_done:
         kptllnd_rx_done(rx, post_credit);
 }
  rx_done:
         kptllnd_rx_done(rx, post_credit);
 }