return transno;
}
-__u64 get_next_transno(struct lu_target *lut, int *type)
+
+static __u64 get_next_transno(struct lu_target *lut, int *type)
{
struct obd_device *obd = lut->lut_obd;
struct target_distribute_txn_data *tdtd = lut->lut_tdtd;
exp_req_replay_healthy)) {
abort_req_replay_queue(obd);
abort_lock_replay_queue(obd);
+ goto abort;
}
spin_lock(&obd->obd_recovery_task_lock);
obd->obd_replayed_requests++;
} else if (type == UPDATE_RECOVERY && transno != 0) {
struct distribute_txn_replay_req *dtrq;
- bool update_transno = false;
+ int rc;
spin_unlock(&obd->obd_recovery_task_lock);
LASSERT(tdtd != NULL);
dtrq = distribute_txn_get_next_req(tdtd);
lu_context_enter(&thread->t_env->le_ctx);
- tdtd->tdtd_replay_handler(env, tdtd, dtrq);
+ rc = tdtd->tdtd_replay_handler(env, tdtd, dtrq);
lu_context_exit(&thread->t_env->le_ctx);
extend_recovery_timer(obd, obd_timeout, true);
- /* Add it to the replay finish list */
- spin_lock(&tdtd->tdtd_replay_list_lock);
- if (dtrq->dtrq_xid != 0) {
+ if (rc == 0 && dtrq->dtrq_xid != 0) {
CDEBUG(D_HA, "Move x"LPU64" t"LPU64
" to finish list\n", dtrq->dtrq_xid,
dtrq->dtrq_master_transno);
+
+ /* Add it to the replay finish list */
+ spin_lock(&tdtd->tdtd_replay_list_lock);
list_add(&dtrq->dtrq_list,
&tdtd->tdtd_replay_finish_list);
- update_transno = true;
- } else {
- dtrq_destroy(dtrq);
- }
- spin_unlock(&tdtd->tdtd_replay_list_lock);
+ spin_unlock(&tdtd->tdtd_replay_list_lock);
- if (update_transno) {
spin_lock(&obd->obd_recovery_task_lock);
if (transno == obd->obd_next_recovery_transno)
obd->obd_next_recovery_transno++;
obd->obd_next_recovery_transno =
transno + 1;
spin_unlock(&obd->obd_recovery_task_lock);
+ } else {
+ dtrq_destroy(dtrq);
+ /* If update recovery fail, then let's abort
+ * the recovery, otherwise it might cause
+ * both llog and filesystem corruption */
+ if (rc < 0)
+ obd->obd_force_abort_recovery = 1;
}
} else {
spin_unlock(&obd->obd_recovery_task_lock);
+abort:
LASSERT(list_empty(&obd->obd_req_replay_queue));
LASSERT(atomic_read(&obd->obd_req_replay_clients) == 0);
/** evict exports failed VBR */
DEBUG_REQ(D_ERROR, req, "status %d, old was %d",
lustre_msg_get_status(req->rq_repmsg),
aa->praa_old_status);
+
+ /* Note: If the replay fails for MDT-MDT recovery, let's
+ * abort all of the following requests in the replay
+ * and sending list, because MDT-MDT update requests
+ * are dependent on each other, see LU-7039 */
+ if (imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS) {
+ struct ptlrpc_request *free_req;
+ struct ptlrpc_request *tmp;
+
+ spin_lock(&imp->imp_lock);
+ list_for_each_entry_safe(free_req, tmp,
+ &imp->imp_replay_list,
+ rq_replay_list) {
+ ptlrpc_free_request(free_req);
+ }
+
+ list_for_each_entry_safe(free_req, tmp,
+ &imp->imp_committed_list,
+ rq_replay_list) {
+ ptlrpc_free_request(free_req);
+ }
+
+ list_for_each_entry_safe(free_req, tmp,
+ &imp->imp_delayed_list,
+ rq_list) {
+ spin_lock(&free_req->rq_lock);
+ free_req->rq_err = 1;
+ free_req->rq_status = -EIO;
+ ptlrpc_client_wake_req(free_req);
+ spin_unlock(&free_req->rq_lock);
+ }
+
+ list_for_each_entry_safe(free_req, tmp,
+ &imp->imp_sending_list,
+ rq_list) {
+ spin_lock(&free_req->rq_lock);
+ free_req->rq_err = 1;
+ free_req->rq_status = -EIO;
+ ptlrpc_client_wake_req(free_req);
+ spin_unlock(&free_req->rq_lock);
+ }
+ spin_unlock(&imp->imp_lock);
+ }
} else {
/* Put it back for re-replay. */
lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status);