Whamcloud - gitweb
* Print portals error string in ptllnd warnings/errors
[fs/lustre-release.git] / lnet / klnds / ptllnd / ptllnd.c
index a82babe..fd707f3 100755 (executable)
@@ -166,6 +166,50 @@ const char *kptllnd_msgtype2str(int type)
 #undef DO_TYPE
 }
 
+const char *kptllnd_errtype2str(int type)
+{
+#define DO_TYPE(x) case x: return #x;
+        switch(type)
+        {
+                DO_TYPE(PTL_OK);
+                DO_TYPE(PTL_SEGV);
+                DO_TYPE(PTL_NO_SPACE);
+                DO_TYPE(PTL_ME_IN_USE);
+                DO_TYPE(PTL_NAL_FAILED);
+                DO_TYPE(PTL_NO_INIT);
+                DO_TYPE(PTL_IFACE_DUP);
+                DO_TYPE(PTL_IFACE_INVALID);
+                DO_TYPE(PTL_HANDLE_INVALID);
+                DO_TYPE(PTL_MD_INVALID);
+                DO_TYPE(PTL_ME_INVALID);
+                DO_TYPE(PTL_PROCESS_INVALID);
+                DO_TYPE(PTL_PT_INDEX_INVALID);
+                DO_TYPE(PTL_SR_INDEX_INVALID);
+                DO_TYPE(PTL_EQ_INVALID);
+                DO_TYPE(PTL_EQ_DROPPED);
+                DO_TYPE(PTL_EQ_EMPTY);
+                DO_TYPE(PTL_MD_NO_UPDATE);
+                DO_TYPE(PTL_FAIL);
+                DO_TYPE(PTL_AC_INDEX_INVALID);
+                DO_TYPE(PTL_MD_ILLEGAL);
+                DO_TYPE(PTL_ME_LIST_TOO_LONG);
+                DO_TYPE(PTL_MD_IN_USE);
+                DO_TYPE(PTL_NI_INVALID);
+                DO_TYPE(PTL_PID_INVALID);
+                DO_TYPE(PTL_PT_FULL);
+                DO_TYPE(PTL_VAL_FAILED);
+                DO_TYPE(PTL_NOT_IMPLEMENTED);
+                DO_TYPE(PTL_NO_ACK);
+                DO_TYPE(PTL_EQ_IN_USE);
+                DO_TYPE(PTL_PID_IN_USE);
+                DO_TYPE(PTL_INV_EQ_SIZE);
+                DO_TYPE(PTL_AGAIN);
+        default:
+                return "<unknown event type>";
+        }
+#undef DO_TYPE
+}
+
 __u32
 kptllnd_cksum (void *ptr, int nob)
 {
@@ -198,7 +242,7 @@ kptllnd_msg_pack(kptl_msg_t *msg, kptl_peer_t *peer)
         /* msg->ptlm_nob   Filled in kptllnd_init_msg()  */
         msg->ptlm_cksum    = 0;
         msg->ptlm_srcnid   = kptllnd_data.kptl_ni->ni_nid;
-        msg->ptlm_srcstamp = kptllnd_data.kptl_incarnation;
+        msg->ptlm_srcstamp = peer->peer_myincarnation;
         msg->ptlm_dstnid   = peer->peer_id.nid;
         msg->ptlm_dststamp = peer->peer_incarnation;
         msg->ptlm_srcpid   = the_lnet.ln_pid;
@@ -432,9 +476,12 @@ kptllnd_startup (lnet_ni_t *ni)
         }
 
         *kptllnd_tunables.kptl_max_msg_size &= ~7;
-        if (*kptllnd_tunables.kptl_max_msg_size < sizeof(kptl_msg_t))
-                *kptllnd_tunables.kptl_max_msg_size =
-                        (sizeof(kptl_msg_t) + 7) & ~7;
+        if (*kptllnd_tunables.kptl_max_msg_size < PTLLND_MIN_BUFFER_SIZE)
+                *kptllnd_tunables.kptl_max_msg_size = PTLLND_MIN_BUFFER_SIZE;
+
+        CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0);
+        CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE);
+
         /*
          * zero pointers, flags etc
          * put everything into a known state.
@@ -485,7 +532,8 @@ kptllnd_startup (lnet_ni_t *ni)
          * Which is ok.
          */
         if (ptl_rc != PTL_OK && ptl_rc != PTL_IFACE_DUP) {
-                CERROR ("PtlNIInit: error %d\n", ptl_rc);
+                CERROR ("PtlNIInit: error %s(%d)\n",
+                        kptllnd_errtype2str(ptl_rc), ptl_rc);
                 rc = -EINVAL;
                 goto failed;
         }
@@ -496,7 +544,8 @@ kptllnd_startup (lnet_ni_t *ni)
                             kptllnd_eq_callback,     /* handler callback */
                             &kptllnd_data.kptl_eqh); /* output handle */
         if (ptl_rc != PTL_OK) {
-                CERROR("PtlEQAlloc failed %d\n", ptl_rc);
+                CERROR("PtlEQAlloc failed %s(%d)\n",
+                       kptllnd_errtype2str(ptl_rc), ptl_rc);
                 rc = -ENOMEM;
                 goto failed;
         }
@@ -507,7 +556,8 @@ kptllnd_startup (lnet_ni_t *ni)
         ptl_rc = PtlGetId(kptllnd_data.kptl_nih,
                           &kptllnd_data.kptl_portals_id);
         if (ptl_rc != PTL_OK) {
-                CERROR ("PtlGetID: error %d\n", ptl_rc);
+                CERROR ("PtlGetID: error %s(%d)\n",
+                        kptllnd_errtype2str(ptl_rc), ptl_rc);
                 rc = -EINVAL;
                 goto failed;
         }
@@ -527,9 +577,9 @@ kptllnd_startup (lnet_ni_t *ni)
                kptllnd_ptlid2str(kptllnd_data.kptl_portals_id),
                libcfs_nid2str(ni->ni_nid));
 
-        /*
-         * Initialized the incarnation
-         */
+        /* Initialized the incarnation - it must be for-all-time unique, even
+         * accounting for the fact that we increment it when we disconnect a
+         * peer that's using it */
         do_gettimeofday(&tv);
         kptllnd_data.kptl_incarnation = (((__u64)tv.tv_sec) * 1000000) +
                                         tv.tv_usec;
@@ -771,13 +821,15 @@ kptllnd_shutdown (lnet_ni_t *ni)
         if (!PtlHandleIsEqual(kptllnd_data.kptl_eqh, PTL_INVALID_HANDLE)) {
                 prc = PtlEQFree(kptllnd_data.kptl_eqh);
                 if (prc != PTL_OK)
-                        CERROR("Error %d freeing portals EQ\n", prc);
+                        CERROR("Error %s(%d) freeing portals EQ\n",
+                               kptllnd_errtype2str(prc), prc);
         }
 
         if (!PtlHandleIsEqual(kptllnd_data.kptl_nih, PTL_INVALID_HANDLE)) {
                 prc = PtlNIFini(kptllnd_data.kptl_nih);
                 if (prc != PTL_OK)
-                        CERROR("Error %d finalizing portals NI\n", prc);
+                        CERROR("Error %s(%d) finalizing portals NI\n",
+                               kptllnd_errtype2str(prc), prc);
         }
         
         LASSERT (atomic_read(&kptllnd_data.kptl_ntx) == 0);