From e24b9245d449ae6f9acda3f3977d9c572a90afa2 Mon Sep 17 00:00:00 2001 From: yury Date: Thu, 6 Nov 2008 11:28:01 +0000 Subject: [PATCH] b=17310 Small add on patch for this bug. - quiet last fail_loc in ptlrpc_check_set() which is supposed to turn fail_loc off. We do not need it in log; - make sure that ptlrpc_set_next_timeout() yields 1s timeout (instead of 0s) for the set with rpcs in "unregistering" stage to prevent ptlrpcd from sleeping forever and hanging in test_45; - some comment cleanups in ptlrpc_set_next_timeout(); - in ptlrpc_unregister_reply() hit fail_loc and engage long unlink only for asyn == 1 which is connect in test_45. This makes sure that synchronous rpcs such as enqueue to mgs will not make test_45 longer than expected; - in ptlrpcd() make sure that we do not sleep on 0 timeout. --- lustre/ptlrpc/client.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 70c852b..bec9ff6 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1102,8 +1102,8 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) /* Turn fail_loc off to prevent it from looping * forever. */ - OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_UNLINK | - OBD_FAIL_ONCE); + OBD_FAIL_CHECK_QUIET(OBD_FAIL_PTLRPC_LONG_UNLINK | + OBD_FAIL_ONCE); /* Move to next phase if reply was successfully * unlinked. */ @@ -1467,18 +1467,26 @@ int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set) req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); /* request in-flight? */ - if (!(((req->rq_phase & (RQ_PHASE_RPC | - RQ_PHASE_UNREGISTERING)) && + if (!(((req->rq_phase & + (RQ_PHASE_RPC | RQ_PHASE_UNREGISTERING)) && !req->rq_waiting) || (req->rq_phase == RQ_PHASE_BULK) || (req->rq_phase == RQ_PHASE_NEW))) continue; - if (req->rq_timedout) /* already timed out */ + /* Check those waiting for long reply unlink every one + * second. */ + if (req->rq_phase == RQ_PHASE_NEW) { + timeout = 1; + break; + } + + /* Already timed out. */ + if (req->rq_timedout) continue; if (req->rq_phase == RQ_PHASE_NEW) - deadline = req->rq_sent; /* delayed send */ + deadline = req->rq_sent; /* delayed send */ else deadline = req->rq_deadline; @@ -1694,7 +1702,7 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) /* Let's setup deadline for reply unlink. */ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_UNLINK) && - request->rq_reply_deadline == 0) + async && request->rq_reply_deadline == 0) request->rq_reply_deadline = cfs_time_current_sec()+LONG_UNLINK; /* Nothing left to do. */ -- 1.8.3.1