From: Di Wang <di.wang@intel.com>
Date: Fri, 13 Nov 2015 16:55:07 +0000 (-0800)
Subject: LU-7490 recovery: abort update recovery once fails
X-Git-Tag: 2.7.66~12
X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=92890d8f555d12ad32dc9841a328e84c5d26e896

LU-7490 recovery: abort update recovery once fails

If update or MDT-MDT recovery fails, then we abort
the replay and resent, because further updates might
cause filesystem or llog corruption.

Signed-off-by: Di Wang <di.wang@intel.com>
Change-Id: Icc7241e94159f7f46a99fb003643605fe2a13c8d
Reviewed-on: http://review.whamcloud.com/17199
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---

diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index fd4f142..43d29c3 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -2126,7 +2126,8 @@ static __u64 get_next_replay_req_transno(struct obd_device *obd)
 
 	return transno;
 }
-__u64 get_next_transno(struct lu_target *lut, int *type)
+
+static __u64 get_next_transno(struct lu_target *lut, int *type)
 {
 	struct obd_device *obd = lut->lut_obd;
 	struct target_distribute_txn_data *tdtd = lut->lut_tdtd;
@@ -2221,6 +2222,7 @@ static void replay_request_or_update(struct lu_env *env,
 					     exp_req_replay_healthy)) {
 			abort_req_replay_queue(obd);
 			abort_lock_replay_queue(obd);
+			goto abort;
 		}
 
 		spin_lock(&obd->obd_recovery_task_lock);
@@ -2273,32 +2275,28 @@ static void replay_request_or_update(struct lu_env *env,
 			obd->obd_replayed_requests++;
 		} else if (type == UPDATE_RECOVERY && transno != 0) {
 			struct distribute_txn_replay_req *dtrq;
-			bool update_transno = false;
+			int rc;
 
 			spin_unlock(&obd->obd_recovery_task_lock);
 
 			LASSERT(tdtd != NULL);
 			dtrq = distribute_txn_get_next_req(tdtd);
 			lu_context_enter(&thread->t_env->le_ctx);
-			tdtd->tdtd_replay_handler(env, tdtd, dtrq);
+			rc = tdtd->tdtd_replay_handler(env, tdtd, dtrq);
 			lu_context_exit(&thread->t_env->le_ctx);
 			extend_recovery_timer(obd, obd_timeout, true);
 
-			/* Add it to the replay finish list */
-			spin_lock(&tdtd->tdtd_replay_list_lock);
-			if (dtrq->dtrq_xid != 0) {
+			if (rc == 0 && dtrq->dtrq_xid != 0) {
 				CDEBUG(D_HA, "Move x"LPU64" t"LPU64
 				       " to finish list\n", dtrq->dtrq_xid,
 				       dtrq->dtrq_master_transno);
+
+				/* Add it to the replay finish list */
+				spin_lock(&tdtd->tdtd_replay_list_lock);
 				list_add(&dtrq->dtrq_list,
 					 &tdtd->tdtd_replay_finish_list);
-				update_transno = true;
-			} else {
-				dtrq_destroy(dtrq);
-			}
-			spin_unlock(&tdtd->tdtd_replay_list_lock);
+				spin_unlock(&tdtd->tdtd_replay_list_lock);
 
-			if (update_transno) {
 				spin_lock(&obd->obd_recovery_task_lock);
 				if (transno == obd->obd_next_recovery_transno)
 					obd->obd_next_recovery_transno++;
@@ -2307,9 +2305,17 @@ static void replay_request_or_update(struct lu_env *env,
 					obd->obd_next_recovery_transno =
 								transno + 1;
 				spin_unlock(&obd->obd_recovery_task_lock);
+			} else {
+				dtrq_destroy(dtrq);
+				/* If update recovery fail, then let's abort
+				 * the recovery, otherwise it might cause
+				 * both llog and filesystem corruption */
+				if (rc < 0)
+					obd->obd_force_abort_recovery = 1;
 			}
 		} else {
 			spin_unlock(&obd->obd_recovery_task_lock);
+abort:
 			LASSERT(list_empty(&obd->obd_req_replay_queue));
 			LASSERT(atomic_read(&obd->obd_req_replay_clients) == 0);
 			/** evict exports failed VBR */
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c
index d665545..16a72d3 100644
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -2927,6 +2927,49 @@ static int ptlrpc_replay_interpret(const struct lu_env *env,
                 DEBUG_REQ(D_ERROR, req, "status %d, old was %d",
                           lustre_msg_get_status(req->rq_repmsg),
                           aa->praa_old_status);
+
+		/* Note: If the replay fails for MDT-MDT recovery, let's
+		 * abort all of the following requests in the replay
+		 * and sending list, because MDT-MDT update requests
+		 * are dependent on each other, see LU-7039 */
+		if (imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS) {
+			struct ptlrpc_request *free_req;
+			struct ptlrpc_request *tmp;
+
+			spin_lock(&imp->imp_lock);
+			list_for_each_entry_safe(free_req, tmp,
+						 &imp->imp_replay_list,
+						 rq_replay_list) {
+				ptlrpc_free_request(free_req);
+			}
+
+			list_for_each_entry_safe(free_req, tmp,
+						 &imp->imp_committed_list,
+						 rq_replay_list) {
+				ptlrpc_free_request(free_req);
+			}
+
+			list_for_each_entry_safe(free_req, tmp,
+						&imp->imp_delayed_list,
+						rq_list) {
+				spin_lock(&free_req->rq_lock);
+				free_req->rq_err = 1;
+				free_req->rq_status = -EIO;
+				ptlrpc_client_wake_req(free_req);
+				spin_unlock(&free_req->rq_lock);
+			}
+
+			list_for_each_entry_safe(free_req, tmp,
+						&imp->imp_sending_list,
+						rq_list) {
+				spin_lock(&free_req->rq_lock);
+				free_req->rq_err = 1;
+				free_req->rq_status = -EIO;
+				ptlrpc_client_wake_req(free_req);
+				spin_unlock(&free_req->rq_lock);
+			}
+			spin_unlock(&imp->imp_lock);
+		}
         } else {
                 /* Put it back for re-replay. */
                 lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status);