From 38eaca10a3238e9dc923bacd2a3dd10d0a6eb417 Mon Sep 17 00:00:00 2001 From: tappro Date: Tue, 9 Sep 2008 06:05:07 +0000 Subject: [PATCH] - fix slow increasing of recovery time b:16389 i:nathan, rread --- lustre/ldlm/ldlm_lib.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 2bf4467..9466996 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1196,7 +1196,7 @@ void target_stop_recovery(void *data, int abort) "(%d clients did)\n", obd->obd_name, obd->obd_recoverable_clients, obd->obd_connected_clients); - class_disconnect_stale_exports(obd); + class_disconnect_stale_exports(obd); } abort_recovery_queue(obd); target_finish_recovery(obd); @@ -1321,12 +1321,18 @@ target_start_and_reset_recovery_timer(struct obd_device *obd, struct ptlrpc_request *req, int new_client) { + int req_timeout = lustre_msg_get_timeout(req->rq_reqmsg); + /* teach server about old server's estimates */ if (!new_client) at_add(&req->rq_rqbd->rqbd_service->srv_at_estimate, - lustre_msg_get_timeout(req->rq_reqmsg)); + at_timeout2est(req_timeout)); check_and_start_recovery_timer(obd, handler); + + req_timeout *= OBD_RECOVERY_FACTOR; + if (req_timeout > obd->obd_recovery_timeout && !new_client) + reset_recovery_timer(obd, req_timeout, 0); } static int check_for_next_transno(struct obd_device *obd) @@ -1647,7 +1653,7 @@ int target_queue_last_replay_reply(struct ptlrpc_request *req, int rc) if (!delayed_done) - target_finish_recovery(obd); + target_finish_recovery(obd); CDEBUG(D_HA, "%s: recovery complete\n", obd_uuid2str(&obd->obd_uuid)); } else { -- 1.8.3.1