/* Client may skip 1 ping; wait for 2.5 */
#define PING_EVICT_TIMEOUT (PING_INTERVAL * 5 / 2)
#define DISK_TIMEOUT 50 /* Beyond this we warn about disk speed */
-#define CONNECTION_SWITCH_MIN 5 /* Connection switching rate limiter */
-#define CONNECTION_SWITCH_MAX 50 /* Max connect interval for nonresponsive
- servers; keep this within the recovery
- period */
+#define CONNECTION_SWITCH_MIN 5U /* Connection switching rate limiter */
+ /* Max connect interval for nonresponsive servers; ~50s to avoid building up
+ connect requests in the LND queues, but within obd_timeout so we don't
+ miss the recovery window */
+#define CONNECTION_SWITCH_MAX min(50U, max(CONNECTION_SWITCH_MIN,obd_timeout))
#define CONNECTION_SWITCH_INC 5 /* Connection timeout backoff */
#ifndef CRAY_XT3
/* In general this should be low to have quick detection of a system
running on a backup server. (If it's too low, import_select_connection
will increase the timeout anyhow.) */
-#define INITIAL_CONNECT_TIMEOUT max_t(int,CONNECTION_SWITCH_MIN,obd_timeout/20)
+#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/20)
#else
/* ...but for very large systems (e.g. CRAY) we need to keep the initial
connect t.o. high (bz 10803), because they will nearly ALWAYS be doing the
connects for the first time (clients "reboot" after every process, so no
chance to generate adaptive timeout data. */
-#define INITIAL_CONNECT_TIMEOUT max_t(int,CONNECTION_SWITCH_MIN,obd_timeout/2)
+#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/2)
#endif
#define LONG_UNLINK 300 /* Unlink should happen before now */
}
static void reset_recovery_timer(struct obd_device *obd,
- struct ptlrpc_request *req, int first)
+ struct ptlrpc_request *req)
{
spin_lock_bh(&obd->obd_processing_task_lock);
if (!obd->obd_recovering) {
return;
}
/* Track the client's largest expected replay time */
- obd->obd_recovery_timeout =
- max((first ? (int)OBD_RECOVERY_TIMEOUT :
- obd->obd_recovery_timeout),
- (int)lustre_msg_get_timeout(req->rq_reqmsg));
+ if (lustre_msg_get_timeout(req->rq_reqmsg) > obd->obd_recovery_timeout)
+ obd->obd_recovery_timeout =
+ lustre_msg_get_timeout(req->rq_reqmsg);
+ LASSERT(obd->obd_recovery_timeout >= OBD_RECOVERY_TIMEOUT);
cfs_timer_arm(&obd->obd_recovery_timer,
cfs_time_shift(obd->obd_recovery_timeout));
spin_unlock_bh(&obd->obd_processing_task_lock);
cfs_timer_init(&obd->obd_recovery_timer, target_recovery_expired, obd);
spin_unlock_bh(&obd->obd_processing_task_lock);
- reset_recovery_timer(obd, req, 1);
+ reset_recovery_timer(obd, req);
}
static int check_for_next_transno(struct obd_device *obd)
DEBUG_REQ(D_HA, req, "processing: ");
(void)obd->obd_recovery_handler(req);
obd->obd_replayed_requests++;
- reset_recovery_timer(obd, req, 0);
+ reset_recovery_timer(obd, req);
/* bug 1580: decide how to properly sync() in recovery */
//mds_fsync_super(obd->u.obt.obt_sb);
class_export_put(req->rq_export);
aa->praa_old_status = lustre_msg_get_status(req->rq_repmsg);
req->rq_status = 0;
req->rq_interpret_reply = ptlrpc_replay_interpret;
+ /* Readjust the timeout for current conditions */
+ ptlrpc_at_set_req_timeout(req);
atomic_inc(&req->rq_import->imp_replay_inflight);
ptlrpc_request_addref(req); /* ptlrpcd needs a ref */
#ifndef CRAY_XT3
/* We want client umounts to happen quickly, no matter the
server state... */
- req->rq_timeout = min(req->rq_timeout, INITIAL_CONNECT_TIMEOUT);
+ req->rq_timeout = min_t(int, req->rq_timeout,
+ INITIAL_CONNECT_TIMEOUT);
#else
/* ... but we always want liblustre clients to nicely
disconnect, so only use the adaptive value. */