return wake_up;
}
+static int check_update_llog(struct lu_target *lut)
+{
+ struct obd_device *obd = lut->lut_obd;
+ struct target_distribute_txn_data *tdtd = lut->lut_tdtd;
+
+ if (obd->obd_abort_recovery) {
+ CDEBUG(D_HA, "waking for aborted recovery\n");
+ return 1;
+ }
+
+ if (atomic_read(&tdtd->tdtd_recovery_threads_count) == 0) {
+ CDEBUG(D_HA, "waking for completion of reading update log\n");
+ return 1;
+ }
+
+ return 0;
+}
+
/**
* wait for recovery events,
* check its status with help of check_routine
*/
if (next_update_transno == 0) {
spin_unlock(&obd->obd_recovery_task_lock);
- wait_event_idle(
+
+ while (wait_event_timeout(
tdtd->tdtd_recovery_threads_waitq,
- atomic_read(&tdtd->tdtd_recovery_threads_count)
- == 0);
+ check_update_llog(lut),
+ cfs_time_seconds(60)) == 0);
spin_lock(&obd->obd_recovery_task_lock);
next_update_transno =
- distribute_txn_get_next_transno(
- lut->lut_tdtd);
+ distribute_txn_get_next_transno(tdtd);
}
}
obd->obd_replayed_requests++;
}
+#define WATCHDOG_TIMEOUT (obd_timeout * 10)
+
static void replay_request_or_update(struct lu_env *env,
struct lu_target *lut,
struct target_recovery_data *trd,
lustre_msg_get_transno(req->rq_reqmsg),
libcfs_nid2str(req->rq_peer.nid));
+ ptlrpc_watchdog_init(&thread->t_watchdog,
+ WATCHDOG_TIMEOUT);
handle_recovery_req(thread, req,
trd->trd_recovery_handler);
+ ptlrpc_watchdog_disable(&thread->t_watchdog);
+
/**
* bz18031: increase next_recovery_transno before
* target_request_copy_put() will drop exp_rpc reference
LASSERT(tdtd != NULL);
dtrq = distribute_txn_get_next_req(tdtd);
lu_context_enter(&thread->t_env->le_ctx);
+ ptlrpc_watchdog_init(&thread->t_watchdog,
+ WATCHDOG_TIMEOUT);
rc = tdtd->tdtd_replay_handler(env, tdtd, dtrq);
+ ptlrpc_watchdog_disable(&thread->t_watchdog);
lu_context_exit(&thread->t_env->le_ctx);
extend_recovery_timer(obd, obd_timeout, true);