Whamcloud - gitweb
b=21678 Add more debug info to lnd_query code path
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd.c
index 6f4a475..c960578 100644 (file)
@@ -1551,6 +1551,8 @@ ksocknal_finalize_zcreq(ksock_conn_t *conn)
                 LASSERT (tx->tx_msg.ksm_zc_cookies[0] != 0);
 
                 tx->tx_msg.ksm_zc_cookies[0] = 0;
+                if (tx->tx_resid == 0)
+                        tx->tx_resid = -1; /* mark it as not-acked */
                 cfs_list_del(&tx->tx_zc_list);
                 cfs_list_add(&tx->tx_zc_list, &zlist);
         }
@@ -1643,6 +1645,8 @@ ksocknal_queue_zombie_conn (ksock_conn_t *conn)
 void
 ksocknal_destroy_conn (ksock_conn_t *conn)
 {
+        cfs_time_t      last_rcv;
+
         /* Final coup-de-grace of the reaper */
         CDEBUG (D_NET, "connection %p\n", conn);
 
@@ -1657,10 +1661,16 @@ ksocknal_destroy_conn (ksock_conn_t *conn)
         /* complete current receive if any */
         switch (conn->ksnc_rx_state) {
         case SOCKNAL_RX_LNET_PAYLOAD:
-                CERROR("Completing partial receive from %s"
-                       ", ip %d.%d.%d.%d:%d, with error\n",
-                       libcfs_id2str(conn->ksnc_peer->ksnp_id),
-                       HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
+                last_rcv = conn->ksnc_rx_deadline -
+                           cfs_time_seconds(*ksocknal_tunables.ksnd_timeout);
+                CERROR("Completing partial receive from %s[%d]"
+                       ", ip %d.%d.%d.%d:%d, with error, wanted: %d, left: %d, "
+                       "last alive is %ld secs ago\n",
+                       libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
+                       HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port,
+                       conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left,
+                       cfs_duration_sec(cfs_time_sub(cfs_time_current(),
+                                        last_rcv)));
                 lnet_finalize (conn->ksnc_peer->ksnp_ni,
                                conn->ksnc_cookie, -EIO);
                 break;
@@ -1805,6 +1815,7 @@ ksocknal_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when)
 {
         int                connect = 1;
         cfs_time_t         last_alive = 0;
+        cfs_time_t         now = cfs_time_current();
         ksock_peer_t      *peer = NULL;
         cfs_rwlock_t      *glock = &ksocknal_data.ksnd_global_lock;
         lnet_process_id_t  id = {.nid = nid, .pid = LUSTRE_SRV_LNET_PID};
@@ -1825,7 +1836,7 @@ ksocknal_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when)
                                 /* something got ACKed */
                                 conn->ksnc_tx_deadline =
                                         cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
-                                peer->ksnp_last_alive = cfs_time_current();
+                                peer->ksnp_last_alive = now;
                                 conn->ksnc_tx_bufnob = bufnob;
                         }
                 }
@@ -1840,6 +1851,11 @@ ksocknal_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when)
         if (last_alive != 0)
                 *when = last_alive;
 
+        CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n",
+               libcfs_nid2str(nid), peer,
+               last_alive ? cfs_duration_sec(now - last_alive) : -1,
+               connect);
+
         if (!connect)
                 return;