chance to generate adaptive timeout data. */
#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/2)
#endif
+/* The max delay between connects is SWITCH_MAX + SWITCH_INC + INITIAL */
+#define RECONNECT_DELAY_MAX (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + \
+ INITIAL_CONNECT_TIMEOUT)
#define LONG_UNLINK 300 /* Unlink should happen before now */
/**
class_disconnect_stale_exports(obd, connect_done,
exp_flags_from_obd(obd) |
OBD_OPT_ABORT_RECOV);
+ /**
+ * if recovery proceeds with versions then some clients may be
+ * timed out waiting for others and trying to reconnect.
+ * Extend timer for such reconnect cases.
+ */
+ if (obd->obd_version_recov)
+ reset_recovery_timer(obd, RECONNECT_DELAY_MAX * 2, 1);
}
/* next stage: replay requests */
/* The third stage: reply on final pings */
CDEBUG(D_INFO, "3: final stage - process recovery completion pings\n");
+ /** evict exports failed VBR */
+ class_disconnect_stale_exports(obd, req_vbr_done,
+ exp_flags_from_obd(obd) |
+ OBD_OPT_ABORT_RECOV);
/** Update server last boot epoch */
lut_boot_epoch_update(lut);
/* We drop recoverying flag to forward all new requests
handle_recovery_req(thread, req,
trd->trd_recovery_handler);
}
- /* evict exports failed VBR */
- class_disconnect_stale_exports(obd, req_vbr_done,
- exp_flags_from_obd(obd) |
- OBD_OPT_ABORT_RECOV);
delta = (jiffies - delta) / HZ;
CDEBUG(D_INFO,"4: recovery completed in %lus - %d/%d reqs/locks\n",
delta, obd->obd_replayed_requests, obd->obd_replayed_locks);
LASSERT(atomic_read(&obd->obd_req_replay_clients) == 0);
LASSERT(atomic_read(&obd->obd_lock_replay_clients) == 0);
- if (delta > obd_timeout * 2) {
+ if (delta > obd_timeout * OBD_RECOVERY_FACTOR) {
CWARN("too long recovery - read logs\n");
libcfs_debug_dumplog();
}
(req->rq_type != PTL_RPC_MSG_ERR) &&
(req->rq_reqmsg != NULL) &&
!(lustre_msg_get_flags(req->rq_reqmsg) &
- (MSG_RESENT | MSG_REPLAY | MSG_LAST_REPLAY))) {
+ (MSG_RESENT | MSG_REPLAY |
+ MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))) {
/* early replies, errors and recovery requests don't count
* toward our service time estimate */
int oldse = at_add(&svc->srv_at_estimate, service_time);
*/
list_for_each_entry(req, &client_list, rq_list) {
LASSERT(!req->rq_export->exp_delayed);
- lut_client_epoch_update(&env, lut, req->rq_export);
+ if (!req->rq_export->exp_vbr_failed)
+ lut_client_epoch_update(&env, lut, req->rq_export);
}
/** return list back at once */
spin_lock_bh(&lut->lut_obd->obd_processing_task_lock);