From 069a9cf551c2e985ea254a1c570b22ed1d72d914 Mon Sep 17 00:00:00 2001
From: Fan Yong <fan.yong@intel.com>
Date: Wed, 25 Nov 2015 06:47:59 +0800
Subject: [PATCH 1/1] LU-6684 lfsck: set the lfsck notify as interruptable

If the LFSCK engine is notifying the remote LFSCK engine about some
LFSCK event, such as LE_PHASE1_DONE, but if the remote server (MDT
or OST) is offline, then such notification RPC will be blocked until
the remote server is online. At that time, if someone wants to stop
the LFSCK, he/she has to wait.

To avoid such trouble, we will make the LFSCK notification RPC to
be interruptable. Then even if some remote server is offline, the
running LFSCK still can be stopped.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Ie9220bc578eb9fe1b1b804a6732fe8ecfba4affb
Reviewed-on: http://review.whamcloud.com/18082
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Liang Zhen <liang.zhen@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---
 lustre/lfsck/lfsck_layout.c |  2 +
 lustre/lfsck/lfsck_lib.c    |  1 +
 lustre/ptlrpc/client.c      | 92 ++++++++++++++++++++++-----------------------
 3 files changed, 49 insertions(+), 46 deletions(-)

diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c
index a74a9e8..66732d5 100644
--- a/lustre/lfsck/lfsck_layout.c
+++ b/lustre/lfsck/lfsck_layout.c
@@ -3516,6 +3516,7 @@ static int lfsck_layout_async_query(const struct lu_env *env,
 	llsaa->llsaa_com = lfsck_component_get(com);
 	llsaa->llsaa_llst = llst;
 	req->rq_interpret_reply = lfsck_layout_slave_async_interpret;
+	req->rq_allow_intr = 1;
 	ptlrpc_set_add_req(set, req);
 
 	RETURN(0);
@@ -3544,6 +3545,7 @@ static int lfsck_layout_async_notify(const struct lu_env *env,
 	tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
 	*tmp = *lr;
 	ptlrpc_request_set_replen(req);
+	req->rq_allow_intr = 1;
 	ptlrpc_set_add_req(set, req);
 
 	RETURN(0);
diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c
index 1a45e33..139c95b 100644
--- a/lustre/lfsck/lfsck_lib.c
+++ b/lustre/lfsck/lfsck_lib.c
@@ -2413,6 +2413,7 @@ int lfsck_async_request(const struct lu_env *env, struct obd_export *exp,
 	if (laia->laia_com != NULL)
 		lfsck_component_get(laia->laia_com);
 	req->rq_interpret_reply = interpreter;
+	req->rq_allow_intr = 1;
 	ptlrpc_set_add_req(set, req);
 
 	return 0;
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c
index 0c62264..bb7fe52 100644
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -1048,6 +1048,9 @@ void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
 {
 	LASSERT(list_empty(&req->rq_set_chain));
 
+	if (req->rq_allow_intr)
+		set->set_allow_intr = 1;
+
 	/* The set takes over the caller's request reference */
 	list_add_tail(&req->rq_set_chain, &set->set_requests);
 	req->rq_set = set;
@@ -1651,8 +1654,14 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
 				   rq_set_chain);
 		struct obd_import *imp = req->rq_import;
 		int unregistered = 0;
+		int async = 1;
 		int rc = 0;
 
+		if (req->rq_phase == RQ_PHASE_COMPLETE) {
+			list_move_tail(&req->rq_set_chain, &comp_reqs);
+			continue;
+		}
+
 		/* This schedule point is mainly for the ptlrpcd caller of this
 		 * function.  Most ptlrpc sets are not long-lived and unbounded
 		 * in length, but at the least the set used by the ptlrpcd is.
@@ -1669,16 +1678,18 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
 			req->rq_status = -EINTR;
 			ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
 
+			/* Since it is interpreted and we have to wait for
+			 * the reply to be unlinked, then use sync mode. */
+			async = 0;
+
 			GOTO(interpret, req->rq_status);
 		}
 
-                if (req->rq_phase == RQ_PHASE_NEW &&
-                    ptlrpc_send_new_req(req)) {
-                        force_timer_recalc = 1;
-                }
+		if (req->rq_phase == RQ_PHASE_NEW && ptlrpc_send_new_req(req))
+			force_timer_recalc = 1;
 
-                /* delayed send - skip */
-                if (req->rq_phase == RQ_PHASE_NEW && req->rq_sent)
+		/* delayed send - skip */
+		if (req->rq_phase == RQ_PHASE_NEW && req->rq_sent)
 			continue;
 
 		/* delayed resend - skip */
@@ -1686,11 +1697,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
 		    req->rq_sent > cfs_time_current_sec())
 			continue;
 
-                if (!(req->rq_phase == RQ_PHASE_RPC ||
-                      req->rq_phase == RQ_PHASE_BULK ||
-                      req->rq_phase == RQ_PHASE_INTERPRET ||
-                      req->rq_phase == RQ_PHASE_UNREGISTERING ||
-                      req->rq_phase == RQ_PHASE_COMPLETE)) {
+		if (!(req->rq_phase == RQ_PHASE_RPC ||
+		      req->rq_phase == RQ_PHASE_BULK ||
+		      req->rq_phase == RQ_PHASE_INTERPRET ||
+		      req->rq_phase == RQ_PHASE_UNREGISTERING)) {
                         DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
                         LBUG();
                 }
@@ -1730,11 +1740,6 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                         ptlrpc_rqphase_move(req, req->rq_next_phase);
                 }
 
-                if (req->rq_phase == RQ_PHASE_COMPLETE) {
-			list_move_tail(&req->rq_set_chain, &comp_reqs);
-                        continue;
-		}
-
                 if (req->rq_phase == RQ_PHASE_INTERPRET)
                         GOTO(interpret, req->rq_status);
 
@@ -1951,27 +1956,27 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
 			req->rq_status = -EIO;
 		}
 
-                ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+		ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
 
-        interpret:
-                LASSERT(req->rq_phase == RQ_PHASE_INTERPRET);
+	interpret:
+		LASSERT(req->rq_phase == RQ_PHASE_INTERPRET);
 
-                /* This moves to "unregistering" phase we need to wait for
-                 * reply unlink. */
-                if (!unregistered && !ptlrpc_unregister_reply(req, 1)) {
-                        /* start async bulk unlink too */
-                        ptlrpc_unregister_bulk(req, 1);
-                        continue;
-                }
+		/* This moves to "unregistering" phase we need to wait for
+		 * reply unlink. */
+		if (!unregistered && !ptlrpc_unregister_reply(req, async)) {
+			/* start async bulk unlink too */
+			ptlrpc_unregister_bulk(req, 1);
+			continue;
+		}
 
-                if (!ptlrpc_unregister_bulk(req, 1))
-                        continue;
+		if (!ptlrpc_unregister_bulk(req, async))
+			continue;
 
-                /* When calling interpret receiving already should be
-                 * finished. */
-                LASSERT(!req->rq_receiving_reply);
+		/* When calling interpret receiving already should be
+		 * finished. */
+		LASSERT(!req->rq_receiving_reply);
 
-                ptlrpc_req_interpret(env, req, req->rq_status);
+		ptlrpc_req_interpret(env, req, req->rq_status);
 
 		if (ptlrpcd_check_work(req)) {
 			atomic_dec(&set->set_remaining);
@@ -2180,6 +2185,9 @@ static void ptlrpc_interrupted_set(void *data)
 		struct ptlrpc_request *req =
 			list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
+		if (req->rq_intr)
+			continue;
+
 		if (req->rq_phase != RQ_PHASE_RPC &&
 		    req->rq_phase != RQ_PHASE_UNREGISTERING &&
 		    !req->rq_allow_intr)
@@ -2274,17 +2282,12 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                 CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n",
                        set, timeout);
 
-		if (timeout == 0 && !signal_pending(current))
-                        /*
-                         * No requests are in-flight (ether timed out
-                         * or delayed), so we can allow interrupts.
-                         * We still want to block for a limited time,
-                         * so we allow interrupts during the timeout.
-                         */
-			lwi = LWI_TIMEOUT_INTR_ALL(cfs_time_seconds(1),
-                                                   ptlrpc_expired_set,
-                                                   ptlrpc_interrupted_set, set);
-		else if (set->set_allow_intr)
+		if ((timeout == 0 && !signal_pending(current)) ||
+		    set->set_allow_intr)
+			/* No requests are in-flight (ether timed out
+			 * or delayed), so we can allow interrupts.
+			 * We still want to block for a limited time,
+			 * so we allow interrupts during the timeout. */
 			lwi = LWI_TIMEOUT_INTR_ALL(
 					cfs_time_seconds(timeout ? timeout : 1),
 					ptlrpc_expired_set,
@@ -2834,9 +2837,6 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
 		RETURN(-ENOMEM);
 	}
 
-	if (req->rq_allow_intr)
-		set->set_allow_intr = 1;
-
 	/* for distributed debugging */
 	lustre_msg_set_status(req->rq_reqmsg, current_pid());
 
-- 
1.8.3.1