From e8fe6f0cfb8c5428c0eabce5ac81295d5ab26b88 Mon Sep 17 00:00:00 2001 From: yangsheng Date: Thu, 14 Jan 2010 21:55:18 +0800 Subject: [PATCH] b=21411 Improvement for AT. i=nathan i=tappro --- lustre/include/lustre_import.h | 2 +- lustre/ldlm/ldlm_lib.c | 4 ++-- lustre/ldlm/ldlm_lockd.c | 4 ++-- lustre/ldlm/ldlm_request.c | 4 ++-- lustre/ptlrpc/client.c | 4 ++-- lustre/ptlrpc/import.c | 9 +++++---- lustre/ptlrpc/niobuf.c | 2 +- lustre/ptlrpc/service.c | 41 ++++++++++++++++++++++++----------------- 8 files changed, 39 insertions(+), 31 deletions(-) diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index f1e3799..e5c71dc 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -215,7 +215,7 @@ static inline void at_init(struct adaptive_timeout *at, int val, int flags) { static inline int at_get(struct adaptive_timeout *at) { return at->at_current; } -int at_add(struct adaptive_timeout *at, unsigned int val); +int at_measured(struct adaptive_timeout *at, unsigned int val); int import_at_get_index(struct obd_import *imp, int portal); extern unsigned int at_max; #define AT_OFF (at_max == 0) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 5ffa243..34fcdaa 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1469,8 +1469,8 @@ target_start_and_reset_recovery_timer(struct obd_device *obd, if (!new_client && service_time) /* Teach server about old server's estimates, as first guess at how long new requests will take. */ - at_add(&req->rq_rqbd->rqbd_service->srv_at_estimate, - service_time); + at_measured(&req->rq_rqbd->rqbd_service->srv_at_estimate, + service_time); check_and_start_recovery_timer(obd, handler); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 530fb92..d7c13e6 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -841,8 +841,8 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data) /* Server-side enqueue wait time estimate, used in __ldlm_add_waiting_lock to set future enqueue timers */ if (total_enqueue_wait < ldlm_get_enq_timeout(lock)) - at_add(&lock->l_resource->lr_namespace->ns_at_estimate, - total_enqueue_wait); + at_measured(&lock->l_resource->lr_namespace->ns_at_estimate, + total_enqueue_wait); else /* bz18618. Don't add lock enqueue time we spend waiting for a previous callback to fail. Locks waiting legitimately will diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 81cb983..58ccbce 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -202,8 +202,8 @@ noreproc: cfs_time_current_sec() - lock->l_last_activity); /* Update our time estimate */ - at_add(&lock->l_resource->lr_namespace->ns_at_estimate, - cfs_time_current_sec() - lock->l_last_activity); + at_measured(&lock->l_resource->lr_namespace->ns_at_estimate, + cfs_time_current_sec() - lock->l_last_activity); RETURN(0); } diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 7320c0b..1ddb24a 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -230,7 +230,7 @@ static void ptlrpc_at_adj_service(struct ptlrpc_request *req, idx = import_at_get_index(req->rq_import, req->rq_request_portal); /* max service estimates are tracked on the server side, so just keep minimal history here */ - oldse = at_add(&at->iat_service_estimate[idx], serv_est); + oldse = at_measured(&at->iat_service_estimate[idx], serv_est); if (oldse != 0) CDEBUG(D_ADAPTTO, "The RPC service estimate for %s ptl %d " "has changed from %d to %d\n", @@ -261,7 +261,7 @@ static void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req, CWARN("Reported service time %u > total measured time %ld\n", service_time, now - req->rq_sent); - oldnl = at_add(&at->iat_net_latency, nl); + oldnl = at_measured(&at->iat_net_latency, nl); if (oldnl != 0) CDEBUG(D_ADAPTTO, "The network latency for %s (nid %s) " "has changed from %d to %d\n", diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 58802a7..06e4c3c 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -513,9 +513,10 @@ static int import_select_connection(struct obd_import *imp) !imp->imp_recon_bk /* not retrying */) { if (at_get(&imp->imp_at.iat_net_latency) < CONNECTION_SWITCH_MAX) { - at_add(&imp->imp_at.iat_net_latency, - MIN(at_get(&imp->imp_at.iat_net_latency) + - CONNECTION_SWITCH_INC, CONNECTION_SWITCH_MAX)); + at_measured(&imp->imp_at.iat_net_latency, + MIN(at_get(&imp->imp_at.iat_net_latency) + + CONNECTION_SWITCH_INC, + CONNECTION_SWITCH_MAX)); } LASSERT(imp_conn->oic_last_attempt); CWARN("%s: tried all connections, increasing latency to %ds\n", @@ -1489,7 +1490,7 @@ extern unsigned int at_min, at_max, at_history; This gives us a max of the last binlimit*AT_BINS secs without the storage, but still smoothing out a return to normalcy from a slow response. (E.g. remember the maximum latency in each minute of the last 4 minutes.) */ -int at_add(struct adaptive_timeout *at, unsigned int val) +int at_measured(struct adaptive_timeout *at, unsigned int val) { unsigned int old = at->at_current; time_t now = cfs_time_current_sec(); diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index a205d65..a908044 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -390,7 +390,7 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags) (MSG_RESENT | MSG_REPLAY | MSG_LAST_REPLAY))) { /* early replies, errors and recovery requests don't count * toward our service time estimate */ - int oldse = at_add(&svc->srv_at_estimate, service_time); + int oldse = at_measured(&svc->srv_at_estimate, service_time); if (oldse != 0) DEBUG_REQ(D_ADAPTTO, req, "svc %s changed estimate from %d to %d", diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 3608d06..778b376 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -751,27 +751,34 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) if (req->rq_export && lustre_msg_get_flags(req->rq_reqmsg) & (MSG_REPLAY | MSG_LAST_REPLAY)) { - /* Use at_extra as early reply period for recovery requests - * but keep it not longer than recovery time / 4 */ - at_add(&svc->srv_at_estimate, - min(at_extra, - req->rq_export->exp_obd->obd_recovery_timeout / 4)); - newdl = cfs_time_current_sec(); + /* During recovery, we don't want to send too many early + * replies, but on the other hand we want to make sure the + * client has enough time to resend if the rpc is lost. So + * during the recovery period send at least 4 early replies, + * spacing them every at_extra if we can. at_estimate should + * always equal this fixed value during recovery. */ + at_measured(&svc->srv_at_estimate, min(at_extra, + req->rq_export->exp_obd->obd_recovery_timeout / 4)); } else { /* Fake our processing time into the future to ask the * clients for some extra amount of time */ - at_add(&svc->srv_at_estimate, at_extra); - newdl = req->rq_arrival_time.tv_sec; - } - newdl += at_get(&svc->srv_at_estimate); - if (req->rq_deadline >= newdl) { - /* We're not adding any time, no need to send an early reply - (e.g. maybe at adaptive_max) */ - DEBUG_REQ(D_WARNING, req, "Couldn't add any time " - "(%ld/%ld), not sending early reply\n", - olddl, newdl - cfs_time_current_sec()); - RETURN(-ETIMEDOUT); + at_measured(&svc->srv_at_estimate, at_extra + + cfs_time_current_sec() - + req->rq_arrival_time.tv_sec); + + /* Check to see if we've actually increased the deadline - + * we may be past adaptive_max */ + if (req->rq_deadline >= req->rq_arrival_time.tv_sec + + at_get(&svc->srv_at_estimate)) { + DEBUG_REQ(D_WARNING, req, "Couldn't add any time " + "(%ld/%ld), not sending early reply\n", + olddl, req->rq_arrival_time.tv_sec + + at_get(&svc->srv_at_estimate) - + cfs_time_current_sec()); + RETURN(-ETIMEDOUT); + } } + newdl = cfs_time_current_sec() + at_get(&svc->srv_at_estimate); OBD_ALLOC(reqcopy, sizeof *reqcopy); if (reqcopy == NULL) -- 1.8.3.1