Whamcloud - gitweb
b=14425
[fs/lustre-release.git] / lnet / klnds / ptllnd / ptllnd_rx_buf.c
index 364540b..356660c 100644 (file)
@@ -268,7 +268,8 @@ kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb)
                          PTL_INS_AFTER,
                          &meh);
         if (rc != PTL_OK) {
-                CERROR("PtlMeAttach rxb failed %d\n", rc);
+                CERROR("PtlMeAttach rxb failed %s(%d)\n",
+                       kptllnd_errtype2str(rc), rc);
                 goto failed;
         }
 
@@ -296,7 +297,8 @@ kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb)
                 return;
         }
         
-        CERROR("PtlMDAttach rxb failed %d\n", rc);
+        CERROR("PtlMDAttach rxb failed %s(%d)\n",
+               kptllnd_errtype2str(rc), rc);
         rc = PtlMEUnlink(meh);
         LASSERT(rc == PTL_OK);
 
@@ -329,12 +331,15 @@ kptllnd_rx_alloc(void)
 }
 
 void
-kptllnd_rx_done(kptl_rx_t *rx)
+kptllnd_rx_done(kptl_rx_t *rx, int post_credit)
 {
         kptl_rx_buffer_t *rxb = rx->rx_rxb;
         kptl_peer_t      *peer = rx->rx_peer;
         unsigned long     flags;
 
+        LASSERT (post_credit == PTLLND_POSTRX_NO_CREDIT ||
+                 post_credit == PTLLND_POSTRX_PEER_CREDIT);
+
         CDEBUG(D_NET, "rx=%p rxb %p peer %p\n", rx, rxb, peer);
 
         if (rxb != NULL)
@@ -344,7 +349,9 @@ kptllnd_rx_done(kptl_rx_t *rx)
                 /* Update credits (after I've decref-ed the buffer) */
                 spin_lock_irqsave(&peer->peer_lock, flags);
 
-                peer->peer_outstanding_credits++;
+                if (post_credit == PTLLND_POSTRX_PEER_CREDIT)
+                        peer->peer_outstanding_credits++;
+
                 LASSERT (peer->peer_outstanding_credits +
                          peer->peer_sent_credits <=
                          *kptllnd_tunables.kptl_peercredits);
@@ -395,16 +402,15 @@ kptllnd_rx_buffer_callback (ptl_event_t *ev)
         LASSERT (ev->type == PTL_EVENT_UNLINK ||
                  ev->match_bits == LNET_MSG_MATCHBITS);
 
-        if (ev->ni_fail_type != PTL_NI_OK)
+        if (ev->ni_fail_type != PTL_NI_OK) {
                 CERROR("Portals error from %s: %s(%d) rxb=%p fail=%s(%d) unlink=%dn",
                        kptllnd_ptlid2str(ev->initiator),
                        kptllnd_evtype2str(ev->type), ev->type, rxb,
                        kptllnd_errtype2str(ev->ni_fail_type),
                        ev->ni_fail_type, unlinked);
 
-        if (ev->type == PTL_EVENT_PUT_END &&
-            ev->ni_fail_type == PTL_NI_OK &&
-            !rxbp->rxbp_shutdown) {
+        } else if (ev->type == PTL_EVENT_PUT_END &&
+                   !rxbp->rxbp_shutdown) {
 
                 /* rxbp_shutdown sampled without locking!  I only treat it as a
                  * hint since shutdown can start while rx's are queued on
@@ -414,10 +420,10 @@ kptllnd_rx_buffer_callback (ptl_event_t *ev)
                  * odd-length message will misalign subsequent messages and
                  * force the fixup below...  */
                 if ((ev->mlength & 7) != 0)
-                        CWARN("Message from %s has odd length %d: "
+                        CWARN("Message from %s has odd length "LPU64": "
                               "probable version incompatibility\n",
                               kptllnd_ptlid2str(ev->initiator),
-                              ev->mlength);
+                              (__u64)ev->mlength);
 #endif
                 rx = kptllnd_rx_alloc();
                 if (rx == NULL) {
@@ -494,8 +500,9 @@ kptllnd_nak (kptl_rx_t *rx)
 
         rc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &mdh);
         if (rc != PTL_OK) {
-                CWARN("Can't NAK %s: bind failed %d\n",
-                      kptllnd_ptlid2str(rx->rx_initiator), rc);
+                CWARN("Can't NAK %s: bind failed %s(%d)\n",
+                      kptllnd_ptlid2str(rx->rx_initiator),
+                      kptllnd_errtype2str(rc), rc);
                 return;
         }
 
@@ -504,14 +511,16 @@ kptllnd_nak (kptl_rx_t *rx)
                     LNET_MSG_MATCHBITS, 0, 0);
 
         if (rc != PTL_OK)
-                CWARN("Can't NAK %s: put failed %d\n",
-                      kptllnd_ptlid2str(rx->rx_initiator), rc);
+                CWARN("Can't NAK %s: put failed %s(%d)\n",
+                      kptllnd_ptlid2str(rx->rx_initiator),
+                      kptllnd_errtype2str(rc), rc);
 }
 
 void
 kptllnd_rx_parse(kptl_rx_t *rx)
 {
         kptl_msg_t             *msg = rx->rx_msg;
+        int                     post_credit = PTLLND_POSTRX_PEER_CREDIT;
         kptl_peer_t            *peer;
         int                     rc;
         unsigned long           flags;
@@ -548,9 +557,11 @@ kptllnd_rx_parse(kptl_rx_t *rx)
         srcid.nid = msg->ptlm_srcnid;
         srcid.pid = msg->ptlm_srcpid;
 
-        CDEBUG(D_NETTRACE, "%s: RX %s c %d %p rxb %p queued %lu ticks\n",
+        CDEBUG(D_NETTRACE, "%s: RX %s c %d %p rxb %p queued %lu ticks (%ld s)\n",
                libcfs_id2str(srcid), kptllnd_msgtype2str(msg->ptlm_type),
-               msg->ptlm_credits, rx, rx->rx_rxb, jiffies - rx->rx_treceived);
+               msg->ptlm_credits, rx, rx->rx_rxb, 
+               jiffies - rx->rx_treceived,
+               cfs_duration_sec(jiffies - rx->rx_treceived));
 
         if (srcid.nid != kptllnd_ptl2lnetnid(rx->rx_initiator.nid)) {
                 CERROR("Bad source id %s from %s\n",
@@ -637,7 +648,7 @@ kptllnd_rx_parse(kptl_rx_t *rx)
                 int  c = peer->peer_credits;
                 int oc = peer->peer_outstanding_credits;
                 int sc = peer->peer_sent_credits;
-                
+
                 spin_unlock_irqrestore(&peer->peer_lock, flags);
 
                 CERROR("%s: buffer overrun [%d/%d+%d]\n",
@@ -650,6 +661,12 @@ kptllnd_rx_parse(kptl_rx_t *rx)
          * buffers after the startup handshake. */
         peer->peer_credits += msg->ptlm_credits;
 
+        /* This ensures the credit taken by NOOP can be returned */
+        if (msg->ptlm_type == PTLLND_MSG_TYPE_NOOP) {
+                peer->peer_outstanding_credits++;
+                post_credit = PTLLND_POSTRX_NO_CREDIT;
+        }
+
         spin_unlock_irqrestore(&peer->peer_lock, flags);
 
         /* See if something can go out now that credits have come in */
@@ -718,5 +735,5 @@ kptllnd_rx_parse(kptl_rx_t *rx)
         if (rx->rx_peer == NULL)                /* drop ref on peer */
                 kptllnd_peer_decref(peer);      /* unless rx_done will */
  rx_done:
-        kptllnd_rx_done(rx);
+        kptllnd_rx_done(rx, post_credit);
 }