From 368689640e6a42a152eee73d354dd6f68836065b Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Thu, 24 Jun 2010 14:02:33 +0800 Subject: [PATCH] b=21636 diagnostic patch to detect request delay send/reply diagnostic patch to detect request delay send/reply i=robert.read i=pravin.shelar --- lustre/include/lustre_net.h | 2 ++ lustre/ptlrpc/client.c | 20 +++++++++++++------- lustre/ptlrpc/events.c | 1 + 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 1b3d57d..1a380dc 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -666,6 +666,8 @@ struct ptlrpc_request { * when request/reply sent (secs), or time when request should be sent */ time_t rq_sent; + /** time for request really sent out */ + time_t rq_real_sent; /** when request must finish. volatile * so that servers' early reply updates to the deadline aren't diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index a844650..52b2f28 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1740,14 +1740,20 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink) req->rq_timedout = 1; cfs_spin_unlock(&req->rq_lock); - DEBUG_REQ(req->rq_fake ? D_INFO : D_WARNING, req, - "Request x"LPU64" sent from %s to NID %s "CFS_DURATION_T"s " - "ago has %s ("CFS_DURATION_T"s prior to deadline).\n", + DEBUG_REQ(req->rq_fake ? D_INFO : D_WARNING, req, "Request x"LPU64 + " sent from %s to NID %s has %s: [sent "CFS_DURATION_T"] " + "[real_sent "CFS_DURATION_T"] [current "CFS_DURATION_T"] " + "[deadline "CFS_DURATION_T"s] [delay "CFS_DURATION_T"s]", req->rq_xid, imp ? imp->imp_obd->obd_name : "", - imp ? libcfs_nid2str(imp->imp_connection->c_peer.nid) : "", - cfs_time_sub(cfs_time_current_sec(), req->rq_sent), - req->rq_net_err ? "failed due to network error" : "timed out", - cfs_time_sub(req->rq_deadline, req->rq_sent)); + imp ? libcfs_nid2str(imp->imp_connection->c_peer.nid) : "", + req->rq_net_err ? "failed due to network error" : + ((req->rq_real_sent == 0 || + cfs_time_before(req->rq_real_sent, req->rq_sent) || + cfs_time_aftereq(req->rq_real_sent, req->rq_deadline)) ? + "timed out for sent delay" : "timed out for slow reply"), + req->rq_sent, req->rq_real_sent, cfs_time_current_sec(), + cfs_time_sub(req->rq_deadline, req->rq_sent), + cfs_time_sub(cfs_time_current_sec(), req->rq_deadline)); if (imp != NULL && obd_debug_peer_on_timeout) LNetCtl(IOC_LIBCFS_DEBUG_PEER, &imp->imp_connection->c_peer); diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 8441c6c..0ce8ca8 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -69,6 +69,7 @@ void request_out_callback(lnet_event_t *ev) "type %d, status %d", ev->type, ev->status); sptlrpc_request_out_callback(req); + req->rq_real_sent = cfs_time_current_sec(); if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) { -- 1.8.3.1