Whamcloud - gitweb
* Print portals error string in ptllnd warnings/errors
authoreeb <eeb>
Wed, 23 May 2007 11:25:46 +0000 (11:25 +0000)
committereeb <eeb>
Wed, 23 May 2007 11:25:46 +0000 (11:25 +0000)
lnet/klnds/ptllnd/ptllnd.c
lnet/klnds/ptllnd/ptllnd_cb.c
lnet/klnds/ptllnd/ptllnd_peer.c
lnet/klnds/ptllnd/ptllnd_rx_buf.c
lnet/ulnds/ptllnd/ptllnd.c
lnet/ulnds/ptllnd/ptllnd_cb.c

index f9361f9..fd707f3 100755 (executable)
@@ -532,7 +532,8 @@ kptllnd_startup (lnet_ni_t *ni)
          * Which is ok.
          */
         if (ptl_rc != PTL_OK && ptl_rc != PTL_IFACE_DUP) {
          * Which is ok.
          */
         if (ptl_rc != PTL_OK && ptl_rc != PTL_IFACE_DUP) {
-                CERROR ("PtlNIInit: error %d\n", ptl_rc);
+                CERROR ("PtlNIInit: error %s(%d)\n",
+                        kptllnd_errtype2str(ptl_rc), ptl_rc);
                 rc = -EINVAL;
                 goto failed;
         }
                 rc = -EINVAL;
                 goto failed;
         }
@@ -543,7 +544,8 @@ kptllnd_startup (lnet_ni_t *ni)
                             kptllnd_eq_callback,     /* handler callback */
                             &kptllnd_data.kptl_eqh); /* output handle */
         if (ptl_rc != PTL_OK) {
                             kptllnd_eq_callback,     /* handler callback */
                             &kptllnd_data.kptl_eqh); /* output handle */
         if (ptl_rc != PTL_OK) {
-                CERROR("PtlEQAlloc failed %d\n", ptl_rc);
+                CERROR("PtlEQAlloc failed %s(%d)\n",
+                       kptllnd_errtype2str(ptl_rc), ptl_rc);
                 rc = -ENOMEM;
                 goto failed;
         }
                 rc = -ENOMEM;
                 goto failed;
         }
@@ -554,7 +556,8 @@ kptllnd_startup (lnet_ni_t *ni)
         ptl_rc = PtlGetId(kptllnd_data.kptl_nih,
                           &kptllnd_data.kptl_portals_id);
         if (ptl_rc != PTL_OK) {
         ptl_rc = PtlGetId(kptllnd_data.kptl_nih,
                           &kptllnd_data.kptl_portals_id);
         if (ptl_rc != PTL_OK) {
-                CERROR ("PtlGetID: error %d\n", ptl_rc);
+                CERROR ("PtlGetID: error %s(%d)\n",
+                        kptllnd_errtype2str(ptl_rc), ptl_rc);
                 rc = -EINVAL;
                 goto failed;
         }
                 rc = -EINVAL;
                 goto failed;
         }
@@ -818,13 +821,15 @@ kptllnd_shutdown (lnet_ni_t *ni)
         if (!PtlHandleIsEqual(kptllnd_data.kptl_eqh, PTL_INVALID_HANDLE)) {
                 prc = PtlEQFree(kptllnd_data.kptl_eqh);
                 if (prc != PTL_OK)
         if (!PtlHandleIsEqual(kptllnd_data.kptl_eqh, PTL_INVALID_HANDLE)) {
                 prc = PtlEQFree(kptllnd_data.kptl_eqh);
                 if (prc != PTL_OK)
-                        CERROR("Error %d freeing portals EQ\n", prc);
+                        CERROR("Error %s(%d) freeing portals EQ\n",
+                               kptllnd_errtype2str(prc), prc);
         }
 
         if (!PtlHandleIsEqual(kptllnd_data.kptl_nih, PTL_INVALID_HANDLE)) {
                 prc = PtlNIFini(kptllnd_data.kptl_nih);
                 if (prc != PTL_OK)
         }
 
         if (!PtlHandleIsEqual(kptllnd_data.kptl_nih, PTL_INVALID_HANDLE)) {
                 prc = PtlNIFini(kptllnd_data.kptl_nih);
                 if (prc != PTL_OK)
-                        CERROR("Error %d finalizing portals NI\n", prc);
+                        CERROR("Error %s(%d) finalizing portals NI\n",
+                               kptllnd_errtype2str(prc), prc);
         }
         
         LASSERT (atomic_read(&kptllnd_data.kptl_ntx) == 0);
         }
         
         LASSERT (atomic_read(&kptllnd_data.kptl_ntx) == 0);
index 75344e1..d734ba8 100644 (file)
@@ -227,8 +227,9 @@ kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type,
         ptlrc = PtlMDBind(kptllnd_data.kptl_nih, tx->tx_rdma_md, 
                           PTL_UNLINK, &mdh);
         if (ptlrc != PTL_OK) {
         ptlrc = PtlMDBind(kptllnd_data.kptl_nih, tx->tx_rdma_md, 
                           PTL_UNLINK, &mdh);
         if (ptlrc != PTL_OK) {
-                CERROR("PtlMDBind(%s) failed: %d\n",
-                       libcfs_id2str(peer->peer_id), ptlrc);
+                CERROR("PtlMDBind(%s) failed: %s(%d)\n",
+                       libcfs_id2str(peer->peer_id),
+                       kptllnd_errtype2str(ptlrc), ptlrc);
                 tx->tx_status = -EIO;
                 kptllnd_tx_decref(tx);
                 return -EIO;
                 tx->tx_status = -EIO;
                 kptllnd_tx_decref(tx);
                 return -EIO;
@@ -271,8 +272,9 @@ kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type,
                                0);                    /* offset */
 
         if (ptlrc != PTL_OK) {
                                0);                    /* offset */
 
         if (ptlrc != PTL_OK) {
-                CERROR("Ptl%s failed: %d\n", 
-                       (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get", ptlrc);
+                CERROR("Ptl%s failed: %s(%d)\n", 
+                       (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get",
+                       kptllnd_errtype2str(ptlrc), ptlrc);
                 
                 kptllnd_peer_close(peer, -EIO);
                 /* Everything (including this RDMA) queued on the peer will
                 
                 kptllnd_peer_close(peer, -EIO);
                 /* Everything (including this RDMA) queued on the peer will
index 77b7191..71329e2 100644 (file)
@@ -503,8 +503,9 @@ kptllnd_post_tx(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag)
 
         prc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &msg_mdh);
         if (prc != PTL_OK) {
 
         prc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &msg_mdh);
         if (prc != PTL_OK) {
-                CERROR("PtlMDBind(%s) failed: %d\n",
-                       libcfs_id2str(peer->peer_id), prc);
+                CERROR("PtlMDBind(%s) failed: %s(%d)\n",
+                       libcfs_id2str(peer->peer_id),
+                       kptllnd_errtype2str(prc), prc);
                 tx->tx_status = -EIO;
                 kptllnd_tx_decref(tx);
                 return;
                 tx->tx_status = -EIO;
                 kptllnd_tx_decref(tx);
                 return;
@@ -664,16 +665,18 @@ kptllnd_peer_check_sends (kptl_peer_t *peer)
                                          PTL_INS_BEFORE,
                                          &meh);
                         if (rc != PTL_OK) {
                                          PTL_INS_BEFORE,
                                          &meh);
                         if (rc != PTL_OK) {
-                                CERROR("PtlMEAttach(%s) failed: %d\n",
-                                       libcfs_id2str(peer->peer_id), rc);
+                                CERROR("PtlMEAttach(%s) failed: %s(%d)\n",
+                                       libcfs_id2str(peer->peer_id),
+                                       kptllnd_errtype2str(rc), rc);
                                 goto failed;
                         }
 
                         rc = PtlMDAttach(meh, tx->tx_rdma_md, PTL_UNLINK,
                                          &tx->tx_rdma_mdh);
                         if (rc != PTL_OK) {
                                 goto failed;
                         }
 
                         rc = PtlMDAttach(meh, tx->tx_rdma_md, PTL_UNLINK,
                                          &tx->tx_rdma_mdh);
                         if (rc != PTL_OK) {
-                                CERROR("PtlMDAttach(%s) failed: %d\n",
-                                       libcfs_id2str(tx->tx_peer->peer_id), rc);
+                                CERROR("PtlMDAttach(%s) failed: %s(%d)\n",
+                                       libcfs_id2str(tx->tx_peer->peer_id),
+                                       kptllnd_errtype2str(rc), rc);
                                 rc = PtlMEUnlink(meh);
                                 LASSERT(rc == PTL_OK);
                                 tx->tx_rdma_mdh = PTL_INVALID_HANDLE;
                                 rc = PtlMEUnlink(meh);
                                 LASSERT(rc == PTL_OK);
                                 tx->tx_rdma_mdh = PTL_INVALID_HANDLE;
@@ -696,8 +699,9 @@ kptllnd_peer_check_sends (kptl_peer_t *peer)
                              0,                 /* offset */
                              0);                /* header data */
                 if (rc != PTL_OK) {
                              0,                 /* offset */
                              0);                /* header data */
                 if (rc != PTL_OK) {
-                        CERROR("PtlPut %s error %d\n",
-                               libcfs_id2str(peer->peer_id), rc);
+                        CERROR("PtlPut %s error %s(%d)\n",
+                               libcfs_id2str(peer->peer_id),
+                               kptllnd_errtype2str(rc), rc);
                         goto failed;
                 }
 
                         goto failed;
                 }
 
index e897086..ce21e95 100644 (file)
@@ -268,7 +268,8 @@ kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb)
                          PTL_INS_AFTER,
                          &meh);
         if (rc != PTL_OK) {
                          PTL_INS_AFTER,
                          &meh);
         if (rc != PTL_OK) {
-                CERROR("PtlMeAttach rxb failed %d\n", rc);
+                CERROR("PtlMeAttach rxb failed %s(%d)\n",
+                       kptllnd_errtype2str(rc), rc);
                 goto failed;
         }
 
                 goto failed;
         }
 
@@ -296,7 +297,8 @@ kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb)
                 return;
         }
         
                 return;
         }
         
-        CERROR("PtlMDAttach rxb failed %d\n", rc);
+        CERROR("PtlMDAttach rxb failed %s(%d)\n",
+               kptllnd_errtype2str(rc), rc);
         rc = PtlMEUnlink(meh);
         LASSERT(rc == PTL_OK);
 
         rc = PtlMEUnlink(meh);
         LASSERT(rc == PTL_OK);
 
@@ -395,16 +397,15 @@ kptllnd_rx_buffer_callback (ptl_event_t *ev)
         LASSERT (ev->type == PTL_EVENT_UNLINK ||
                  ev->match_bits == LNET_MSG_MATCHBITS);
 
         LASSERT (ev->type == PTL_EVENT_UNLINK ||
                  ev->match_bits == LNET_MSG_MATCHBITS);
 
-        if (ev->ni_fail_type != PTL_NI_OK)
+        if (ev->ni_fail_type != PTL_NI_OK) {
                 CERROR("Portals error from %s: %s(%d) rxb=%p fail=%s(%d) unlink=%dn",
                        kptllnd_ptlid2str(ev->initiator),
                        kptllnd_evtype2str(ev->type), ev->type, rxb,
                        kptllnd_errtype2str(ev->ni_fail_type),
                        ev->ni_fail_type, unlinked);
 
                 CERROR("Portals error from %s: %s(%d) rxb=%p fail=%s(%d) unlink=%dn",
                        kptllnd_ptlid2str(ev->initiator),
                        kptllnd_evtype2str(ev->type), ev->type, rxb,
                        kptllnd_errtype2str(ev->ni_fail_type),
                        ev->ni_fail_type, unlinked);
 
-        if (ev->type == PTL_EVENT_PUT_END &&
-            ev->ni_fail_type == PTL_NI_OK &&
-            !rxbp->rxbp_shutdown) {
+        } else if (ev->type == PTL_EVENT_PUT_END &&
+                   !rxbp->rxbp_shutdown) {
 
                 /* rxbp_shutdown sampled without locking!  I only treat it as a
                  * hint since shutdown can start while rx's are queued on
 
                 /* rxbp_shutdown sampled without locking!  I only treat it as a
                  * hint since shutdown can start while rx's are queued on
@@ -494,8 +495,9 @@ kptllnd_nak (kptl_rx_t *rx)
 
         rc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &mdh);
         if (rc != PTL_OK) {
 
         rc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &mdh);
         if (rc != PTL_OK) {
-                CWARN("Can't NAK %s: bind failed %d\n",
-                      kptllnd_ptlid2str(rx->rx_initiator), rc);
+                CWARN("Can't NAK %s: bind failed %s(%d)\n",
+                      kptllnd_ptlid2str(rx->rx_initiator),
+                      kptllnd_errtype2str(rc), rc);
                 return;
         }
 
                 return;
         }
 
@@ -504,8 +506,9 @@ kptllnd_nak (kptl_rx_t *rx)
                     LNET_MSG_MATCHBITS, 0, 0);
 
         if (rc != PTL_OK)
                     LNET_MSG_MATCHBITS, 0, 0);
 
         if (rc != PTL_OK)
-                CWARN("Can't NAK %s: put failed %d\n",
-                      kptllnd_ptlid2str(rx->rx_initiator), rc);
+                CWARN("Can't NAK %s: put failed %s(%d)\n",
+                      kptllnd_ptlid2str(rx->rx_initiator),
+                      kptllnd_errtype2str(rc), rc);
 }
 
 void
 }
 
 void
@@ -548,9 +551,11 @@ kptllnd_rx_parse(kptl_rx_t *rx)
         srcid.nid = msg->ptlm_srcnid;
         srcid.pid = msg->ptlm_srcpid;
 
         srcid.nid = msg->ptlm_srcnid;
         srcid.pid = msg->ptlm_srcpid;
 
-        CDEBUG(D_NETTRACE, "%s: RX %s c %d %p rxb %p queued %lu ticks\n",
+        CDEBUG(D_NETTRACE, "%s: RX %s c %d %p rxb %p queued %lu ticks (%ld s)\n",
                libcfs_id2str(srcid), kptllnd_msgtype2str(msg->ptlm_type),
                libcfs_id2str(srcid), kptllnd_msgtype2str(msg->ptlm_type),
-               msg->ptlm_credits, rx, rx->rx_rxb, jiffies - rx->rx_treceived);
+               msg->ptlm_credits, rx, rx->rx_rxb, 
+               jiffies - rx->rx_treceived,
+               cfs_duration_sec(jiffies - rx->rx_treceived));
 
         if (srcid.nid != kptllnd_ptl2lnetnid(rx->rx_initiator.nid)) {
                 CERROR("Bad source id %s from %s\n",
 
         if (srcid.nid != kptllnd_ptl2lnetnid(rx->rx_initiator.nid)) {
                 CERROR("Bad source id %s from %s\n",
index a3d06f2..ef882a1 100644 (file)
@@ -691,7 +691,8 @@ ptllnd_startup (lnet_ni_t *ni)
         rc = PtlNIInit(PTL_IFACE_DEFAULT, plni->plni_ptllnd_pid,
                        NULL, NULL, &plni->plni_nih);
         if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
         rc = PtlNIInit(PTL_IFACE_DEFAULT, plni->plni_ptllnd_pid,
                        NULL, NULL, &plni->plni_nih);
         if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
-                CERROR("PtlNIInit failed: %d\n", rc);
+                CERROR("PtlNIInit failed: %s(%d)\n",
+                      ptllnd_errtype2str(rc), rc);
                 rc = -ENODEV;
                 goto failed2;
         }
                 rc = -ENODEV;
                 goto failed2;
         }
@@ -699,7 +700,8 @@ ptllnd_startup (lnet_ni_t *ni)
         rc = PtlEQAlloc(plni->plni_nih, plni->plni_eq_size,
                         PTL_EQ_HANDLER_NONE, &plni->plni_eqh);
         if (rc != PTL_OK) {
         rc = PtlEQAlloc(plni->plni_nih, plni->plni_eq_size,
                         PTL_EQ_HANDLER_NONE, &plni->plni_eqh);
         if (rc != PTL_OK) {
-                CERROR("PtlEQAlloc failed: %d\n", rc);
+                CERROR("PtlEQAlloc failed: %s(%d)\n",
+                      ptllnd_errtype2str(rc), rc);
                 rc = -ENODEV;
                 goto failed3;
         }
                 rc = -ENODEV;
                 goto failed3;
         }
@@ -707,8 +709,10 @@ ptllnd_startup (lnet_ni_t *ni)
         /*
          * Fetch the Portals NID
          */
         /*
          * Fetch the Portals NID
          */
-        if(rc != PtlGetId(plni->plni_nih,&plni->plni_portals_id)){
-                CERROR ("PtlGetID failed : %d\n", rc);
+       rc = PtlGetId(plni->plni_nih, &plni->plni_portals_id);
+        if (rc != PTL_OK) {
+                CERROR ("PtlGetID failed : %s(%d)\n",
+                       ptllnd_errtype2str(rc), rc);
                 rc = -EINVAL;
                 goto failed4;
         }
                 rc = -EINVAL;
                 goto failed4;
         }
index ec6170f..d2bab07 100644 (file)
@@ -713,7 +713,8 @@ ptllnd_post_buffer(ptllnd_buffer_t *buf)
                          anyid, LNET_MSG_MATCHBITS, 0,
                          PTL_UNLINK, PTL_INS_AFTER, &meh);
         if (rc != PTL_OK) {
                          anyid, LNET_MSG_MATCHBITS, 0,
                          PTL_UNLINK, PTL_INS_AFTER, &meh);
         if (rc != PTL_OK) {
-                CERROR("PtlMEAttach failed: %d\n", rc);
+                CERROR("PtlMEAttach failed: %s(%d)\n",
+                       ptllnd_errtype2str(rc), rc);
                 return -ENOMEM;
         }
 
                 return -ENOMEM;
         }
 
@@ -724,7 +725,8 @@ ptllnd_post_buffer(ptllnd_buffer_t *buf)
         if (rc == PTL_OK)
                 return 0;
 
         if (rc == PTL_OK)
                 return 0;
 
-        CERROR("PtlMDAttach failed: %d\n", rc);
+        CERROR("PtlMDAttach failed: %s(%d)\n",
+               ptllnd_errtype2str(rc), rc);
 
         buf->plb_posted = 0;
         plni->plni_nposted_buffers--;
 
         buf->plb_posted = 0;
         plni->plni_nposted_buffers--;
@@ -843,8 +845,9 @@ ptllnd_check_sends(ptllnd_peer_t *peer)
 
                 rc = PtlMDBind(plni->plni_nih, md, LNET_UNLINK, &mdh);
                 if (rc != PTL_OK) {
 
                 rc = PtlMDBind(plni->plni_nih, md, LNET_UNLINK, &mdh);
                 if (rc != PTL_OK) {
-                        CERROR("PtlMDBind for %s failed: %d\n",
-                               libcfs_id2str(peer->plp_id), rc);
+                        CERROR("PtlMDBind for %s failed: %s(%d)\n",
+                               libcfs_id2str(peer->plp_id),
+                               ptllnd_errtype2str(rc), rc);
                         tx->tx_status = -EIO;
                         ptllnd_tx_done(tx);
                         break;
                         tx->tx_status = -EIO;
                         ptllnd_tx_done(tx);
                         break;
@@ -869,8 +872,9 @@ ptllnd_check_sends(ptllnd_peer_t *peer)
                 rc = PtlPut(mdh, PTL_NOACK_REQ, peer->plp_ptlid,
                             plni->plni_portal, 0, LNET_MSG_MATCHBITS, 0, 0);
                 if (rc != PTL_OK) {
                 rc = PtlPut(mdh, PTL_NOACK_REQ, peer->plp_ptlid,
                             plni->plni_portal, 0, LNET_MSG_MATCHBITS, 0, 0);
                 if (rc != PTL_OK) {
-                        CERROR("PtlPut for %s failed: %d\n",
-                               libcfs_id2str(peer->plp_id), rc);
+                        CERROR("PtlPut for %s failed: %s(%d)\n",
+                               libcfs_id2str(peer->plp_id),
+                               ptllnd_errtype2str(rc), rc);
                         tx->tx_status = -EIO;
                         ptllnd_tx_done(tx);
                         break;
                         tx->tx_status = -EIO;
                         ptllnd_tx_done(tx);
                         break;
@@ -950,8 +954,9 @@ ptllnd_passive_rdma(ptllnd_peer_t *peer, int type, lnet_msg_t *msg,
         rc = PtlMEAttach(plni->plni_nih, plni->plni_portal, peer->plp_ptlid,
                          matchbits, 0, PTL_UNLINK, PTL_INS_BEFORE, &meh);
         if (rc != PTL_OK) {
         rc = PtlMEAttach(plni->plni_nih, plni->plni_portal, peer->plp_ptlid,
                          matchbits, 0, PTL_UNLINK, PTL_INS_BEFORE, &meh);
         if (rc != PTL_OK) {
-                CERROR("PtlMEAttach for %s failed: %d\n",
-                       libcfs_id2str(peer->plp_id), rc);
+                CERROR("PtlMEAttach for %s failed: %s(%d)\n",
+                       libcfs_id2str(peer->plp_id),
+                       ptllnd_errtype2str(rc), rc);
                 rc = -EIO;
                 goto failed;
         }
                 rc = -EIO;
                 goto failed;
         }
@@ -960,8 +965,9 @@ ptllnd_passive_rdma(ptllnd_peer_t *peer, int type, lnet_msg_t *msg,
 
         rc = PtlMDAttach(meh, md, LNET_UNLINK, &mdh);
         if (rc != PTL_OK) {
 
         rc = PtlMDAttach(meh, md, LNET_UNLINK, &mdh);
         if (rc != PTL_OK) {
-                CERROR("PtlMDAttach for %s failed: %d\n",
-                       libcfs_id2str(peer->plp_id), rc);
+                CERROR("PtlMDAttach for %s failed: %s(%d)\n",
+                       libcfs_id2str(peer->plp_id),
+                       ptllnd_errtype2str(rc), rc);
                 rc2 = PtlMEUnlink(meh);
                 LASSERT (rc2 == PTL_OK);
                 rc = -EIO;
                 rc2 = PtlMEUnlink(meh);
                 LASSERT (rc2 == PTL_OK);
                 rc = -EIO;
@@ -1051,8 +1057,9 @@ ptllnd_active_rdma(ptllnd_peer_t *peer, int type,
 
         rc = PtlMDBind(plni->plni_nih, md, LNET_UNLINK, &mdh);
         if (rc != PTL_OK) {
 
         rc = PtlMDBind(plni->plni_nih, md, LNET_UNLINK, &mdh);
         if (rc != PTL_OK) {
-                CERROR("PtlMDBind for %s failed: %d\n",
-                       libcfs_id2str(peer->plp_id), rc);
+                CERROR("PtlMDBind for %s failed: %s(%d)\n",
+                       libcfs_id2str(peer->plp_id),
+                       ptllnd_errtype2str(rc), rc);
                 rc = -EIO;
                 goto failed;
         }
                 rc = -EIO;
                 goto failed;
         }
@@ -1075,8 +1082,9 @@ ptllnd_active_rdma(ptllnd_peer_t *peer, int type,
         if (rc == PTL_OK)
                 return 0;
 
         if (rc == PTL_OK)
                 return 0;
 
-        CERROR("Can't initiate RDMA with %s: %d\n",
-               libcfs_id2str(peer->plp_id), rc);
+        CERROR("Can't initiate RDMA with %s: %s(%d)\n",
+               libcfs_id2str(peer->plp_id),
+               ptllnd_errtype2str(rc), rc);
 
         tx->tx_lnetmsg = NULL;
  failed:
 
         tx->tx_lnetmsg = NULL;
  failed: