Whamcloud - gitweb
b=21636 diagnostic patch to detect request delay send/reply
authorFan Yong <Yong.Fan@sun.com>
Thu, 24 Jun 2010 06:02:33 +0000 (14:02 +0800)
committerRobert Read <robert.read@oracle.com>
Thu, 24 Jun 2010 06:19:05 +0000 (23:19 -0700)
diagnostic patch to detect request delay send/reply

i=robert.read
i=pravin.shelar

lustre/include/lustre_net.h
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c

index 1b3d57d..1a380dc 100644 (file)
@@ -666,6 +666,8 @@ struct ptlrpc_request {
          * when request/reply sent (secs), or time when request should be sent
          */
         time_t rq_sent;
+        /** time for request really sent out */
+        time_t rq_real_sent;
 
         /** when request must finish. volatile
          * so that servers' early reply updates to the deadline aren't
index a844650..52b2f28 100644 (file)
@@ -1740,14 +1740,20 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink)
         req->rq_timedout = 1;
         cfs_spin_unlock(&req->rq_lock);
 
-        DEBUG_REQ(req->rq_fake ? D_INFO : D_WARNING, req, 
-                  "Request x"LPU64" sent from %s to NID %s "CFS_DURATION_T"s "
-                  "ago has %s ("CFS_DURATION_T"s prior to deadline).\n",
+        DEBUG_REQ(req->rq_fake ? D_INFO : D_WARNING, req, "Request x"LPU64
+                  " sent from %s to NID %s has %s: [sent "CFS_DURATION_T"] "
+                  "[real_sent "CFS_DURATION_T"] [current "CFS_DURATION_T"] "
+                  "[deadline "CFS_DURATION_T"s] [delay "CFS_DURATION_T"s]",
                   req->rq_xid, imp ? imp->imp_obd->obd_name : "<?>",
-                  imp ? libcfs_nid2str(imp->imp_connection->c_peer.nid) : "<?>",
-                  cfs_time_sub(cfs_time_current_sec(), req->rq_sent),
-                  req->rq_net_err ? "failed due to network error" : "timed out",
-                  cfs_time_sub(req->rq_deadline, req->rq_sent));
+                  imp ? libcfs_nid2str(imp->imp_connection->c_peer.nid) : "<?>", 
+                  req->rq_net_err ? "failed due to network error" :
+                     ((req->rq_real_sent == 0 ||
+                       cfs_time_before(req->rq_real_sent, req->rq_sent) ||
+                       cfs_time_aftereq(req->rq_real_sent, req->rq_deadline)) ?
+                      "timed out for sent delay" : "timed out for slow reply"),
+                  req->rq_sent, req->rq_real_sent, cfs_time_current_sec(),
+                  cfs_time_sub(req->rq_deadline, req->rq_sent),
+                  cfs_time_sub(cfs_time_current_sec(), req->rq_deadline));
 
         if (imp != NULL && obd_debug_peer_on_timeout)
                 LNetCtl(IOC_LIBCFS_DEBUG_PEER, &imp->imp_connection->c_peer);
index 8441c6c..0ce8ca8 100644 (file)
@@ -69,6 +69,7 @@ void request_out_callback(lnet_event_t *ev)
                   "type %d, status %d", ev->type, ev->status);
 
         sptlrpc_request_out_callback(req);
+        req->rq_real_sent = cfs_time_current_sec();
 
         if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {