void *event_arg);
/* import.c */
+static inline unsigned int at_est2timeout(unsigned int val)
+{
+ /* add an arbitrary minimum: 125% +5 sec */
+ return (val + (val >> 2) + 5);
+}
+
+static inline unsigned int at_timeout2est(unsigned int val)
+{
+ /* restore estimate value from timeout */
+ LASSERT(val);
+ return ((val - 1) / 5 * 4);
+}
+
static inline void at_init(struct adaptive_timeout *at, int val, int flags) {
memset(at, 0, sizeof(*at));
at->at_current = val;
struct ptlrpc_request *req,
int new_client)
{
- int req_timeout = OBD_RECOVERY_FACTOR *
- lustre_msg_get_timeout(req->rq_reqmsg);
+ int req_timeout = lustre_msg_get_timeout(req->rq_reqmsg);
+
+ /* teach server about old server's estimates */
+ if (!new_client)
+ at_add(&req->rq_rqbd->rqbd_service->srv_at_estimate,
+ at_timeout2est(req_timeout));
check_and_start_recovery_timer(obd);
+ req_timeout *= OBD_RECOVERY_FACTOR;
if (req_timeout > obd->obd_recovery_timeout && !new_client)
reset_recovery_timer(obd, req_timeout, 0);
}
idx = import_at_get_index(req->rq_import,
req->rq_request_portal);
serv_est = at_get(&at->iat_service_estimate[idx]);
- /* add an arbitrary minimum: 125% +5 sec */
- req->rq_timeout = serv_est + (serv_est >> 2) + 5;
+ req->rq_timeout = at_est2timeout(serv_est);
/* We could get even fancier here, using history to predict increased
loading... */
unsigned int oldse;
struct imp_at *at;
+ /* do estimate only if is not in recovery */
+ if (!(req->rq_send_state & (LUSTRE_IMP_FULL | LUSTRE_IMP_CONNECTING)))
+ return;
+
LASSERT(req->rq_import);
at = &req->rq_import->imp_at;
req->rq_arrival_time.tv_sec, 1);
if (!(flags & PTLRPC_REPLY_EARLY) &&
- (req->rq_type != PTL_RPC_MSG_ERR)) {
- /* early replies and errors don't count toward our service
- time estimate */
+ (req->rq_type != PTL_RPC_MSG_ERR) &&
+ !(lustre_msg_get_flags(req->rq_reqmsg) &
+ (MSG_RESENT | MSG_REPLAY | MSG_LAST_REPLAY))) {
+ /* early replies, errors and recovery requests don't count
+ * toward our service time estimate */
int oldse = at_add(&svc->srv_at_estimate, service_time);
if (oldse != 0)
DEBUG_REQ(D_ADAPTTO, req,
RETURN(-ENOSYS);
}
- if (extra_time) {
- /* Fake our processing time into the future to ask the
- clients for some extra amount of time */
- extra_time += cfs_time_current_sec() -
- req->rq_arrival_time.tv_sec;
- at_add(&svc->srv_at_estimate, extra_time);
+ if (req->rq_export && req->rq_export->exp_in_recovery) {
+ /* don't increase server estimates during recovery, and give
+ clients the full recovery time. */
+ newdl = cfs_time_current_sec() +
+ req->rq_export->exp_obd->obd_recovery_timeout;
+ } else {
+ if (extra_time) {
+ /* Fake our processing time into the future to ask the
+ clients for some extra amount of time */
+ extra_time += cfs_time_current_sec() -
+ req->rq_arrival_time.tv_sec;
+ at_add(&svc->srv_at_estimate, extra_time);
+ }
+ newdl = req->rq_arrival_time.tv_sec +
+ at_get(&svc->srv_at_estimate);
}
-
- newdl = req->rq_arrival_time.tv_sec + at_get(&svc->srv_at_estimate);
if (req->rq_deadline >= newdl) {
/* We're not adding any time, no need to send an early reply
(e.g. maybe at adaptive_max) */
}
run_test 19 "resend of open request"
+test_20() { #16389
+ BEFORE=`date +%s`
+ replay_barrier $SINGLEMDS
+ touch $MOUNT1/a
+ touch $MOUNT2/b
+ umount $MOUNT2
+ facet_failover $SINGLEMDS
+ df $MOUNT1 || return 1
+ rm $MOUNT1/a
+ zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
+ TIER1=$((`date +%s` - BEFORE))
+ BEFORE=`date +%s`
+ replay_barrier $SINGLEMDS
+ touch $MOUNT1/a
+ touch $MOUNT2/b
+ umount $MOUNT2
+ facet_failover $SINGLEMDS
+ df $MOUNT1 || return 1
+ rm $MOUNT1/a
+ zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
+ TIER2=$((`date +%s` - BEFORE))
+ [ $TIER2 -ge $((TIER1 * 2)) ] && \
+ error "recovery time is growing $TIER2 > $TIER1"
+ return 0
+}
+run_test 20 "recovery time is not increasing"
+
equals_msg `basename $0`: test complete, cleaning up
SLEEP=$((`date +%s` - $NOW))
[ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP