Whamcloud - gitweb
LU-6684 lfsck: set the lfsck notify as interruptable 82/18082/8
authorFan Yong <fan.yong@intel.com>
Tue, 24 Nov 2015 22:47:59 +0000 (06:47 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 2 Feb 2016 04:30:38 +0000 (04:30 +0000)
If the LFSCK engine is notifying the remote LFSCK engine about some
LFSCK event, such as LE_PHASE1_DONE, but if the remote server (MDT
or OST) is offline, then such notification RPC will be blocked until
the remote server is online. At that time, if someone wants to stop
the LFSCK, he/she has to wait.

To avoid such trouble, we will make the LFSCK notification RPC to
be interruptable. Then even if some remote server is offline, the
running LFSCK still can be stopped.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Ie9220bc578eb9fe1b1b804a6732fe8ecfba4affb
Reviewed-on: http://review.whamcloud.com/18082
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Liang Zhen <liang.zhen@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/lfsck/lfsck_layout.c
lustre/lfsck/lfsck_lib.c
lustre/ptlrpc/client.c

index a74a9e8..66732d5 100644 (file)
@@ -3516,6 +3516,7 @@ static int lfsck_layout_async_query(const struct lu_env *env,
        llsaa->llsaa_com = lfsck_component_get(com);
        llsaa->llsaa_llst = llst;
        req->rq_interpret_reply = lfsck_layout_slave_async_interpret;
        llsaa->llsaa_com = lfsck_component_get(com);
        llsaa->llsaa_llst = llst;
        req->rq_interpret_reply = lfsck_layout_slave_async_interpret;
+       req->rq_allow_intr = 1;
        ptlrpc_set_add_req(set, req);
 
        RETURN(0);
        ptlrpc_set_add_req(set, req);
 
        RETURN(0);
@@ -3544,6 +3545,7 @@ static int lfsck_layout_async_notify(const struct lu_env *env,
        tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
        *tmp = *lr;
        ptlrpc_request_set_replen(req);
        tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
        *tmp = *lr;
        ptlrpc_request_set_replen(req);
+       req->rq_allow_intr = 1;
        ptlrpc_set_add_req(set, req);
 
        RETURN(0);
        ptlrpc_set_add_req(set, req);
 
        RETURN(0);
index 1a45e33..139c95b 100644 (file)
@@ -2413,6 +2413,7 @@ int lfsck_async_request(const struct lu_env *env, struct obd_export *exp,
        if (laia->laia_com != NULL)
                lfsck_component_get(laia->laia_com);
        req->rq_interpret_reply = interpreter;
        if (laia->laia_com != NULL)
                lfsck_component_get(laia->laia_com);
        req->rq_interpret_reply = interpreter;
+       req->rq_allow_intr = 1;
        ptlrpc_set_add_req(set, req);
 
        return 0;
        ptlrpc_set_add_req(set, req);
 
        return 0;
index 0c62264..bb7fe52 100644 (file)
@@ -1048,6 +1048,9 @@ void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
 {
        LASSERT(list_empty(&req->rq_set_chain));
 
 {
        LASSERT(list_empty(&req->rq_set_chain));
 
+       if (req->rq_allow_intr)
+               set->set_allow_intr = 1;
+
        /* The set takes over the caller's request reference */
        list_add_tail(&req->rq_set_chain, &set->set_requests);
        req->rq_set = set;
        /* The set takes over the caller's request reference */
        list_add_tail(&req->rq_set_chain, &set->set_requests);
        req->rq_set = set;
@@ -1651,8 +1654,14 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                                   rq_set_chain);
                struct obd_import *imp = req->rq_import;
                int unregistered = 0;
                                   rq_set_chain);
                struct obd_import *imp = req->rq_import;
                int unregistered = 0;
+               int async = 1;
                int rc = 0;
 
                int rc = 0;
 
+               if (req->rq_phase == RQ_PHASE_COMPLETE) {
+                       list_move_tail(&req->rq_set_chain, &comp_reqs);
+                       continue;
+               }
+
                /* This schedule point is mainly for the ptlrpcd caller of this
                 * function.  Most ptlrpc sets are not long-lived and unbounded
                 * in length, but at the least the set used by the ptlrpcd is.
                /* This schedule point is mainly for the ptlrpcd caller of this
                 * function.  Most ptlrpc sets are not long-lived and unbounded
                 * in length, but at the least the set used by the ptlrpcd is.
@@ -1669,16 +1678,18 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                        req->rq_status = -EINTR;
                        ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
 
                        req->rq_status = -EINTR;
                        ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
 
+                       /* Since it is interpreted and we have to wait for
+                        * the reply to be unlinked, then use sync mode. */
+                       async = 0;
+
                        GOTO(interpret, req->rq_status);
                }
 
                        GOTO(interpret, req->rq_status);
                }
 
-                if (req->rq_phase == RQ_PHASE_NEW &&
-                    ptlrpc_send_new_req(req)) {
-                        force_timer_recalc = 1;
-                }
+               if (req->rq_phase == RQ_PHASE_NEW && ptlrpc_send_new_req(req))
+                       force_timer_recalc = 1;
 
 
-                /* delayed send - skip */
-                if (req->rq_phase == RQ_PHASE_NEW && req->rq_sent)
+               /* delayed send - skip */
+               if (req->rq_phase == RQ_PHASE_NEW && req->rq_sent)
                        continue;
 
                /* delayed resend - skip */
                        continue;
 
                /* delayed resend - skip */
@@ -1686,11 +1697,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                    req->rq_sent > cfs_time_current_sec())
                        continue;
 
                    req->rq_sent > cfs_time_current_sec())
                        continue;
 
-                if (!(req->rq_phase == RQ_PHASE_RPC ||
-                      req->rq_phase == RQ_PHASE_BULK ||
-                      req->rq_phase == RQ_PHASE_INTERPRET ||
-                      req->rq_phase == RQ_PHASE_UNREGISTERING ||
-                      req->rq_phase == RQ_PHASE_COMPLETE)) {
+               if (!(req->rq_phase == RQ_PHASE_RPC ||
+                     req->rq_phase == RQ_PHASE_BULK ||
+                     req->rq_phase == RQ_PHASE_INTERPRET ||
+                     req->rq_phase == RQ_PHASE_UNREGISTERING)) {
                         DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
                         LBUG();
                 }
                         DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
                         LBUG();
                 }
@@ -1730,11 +1740,6 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                         ptlrpc_rqphase_move(req, req->rq_next_phase);
                 }
 
                         ptlrpc_rqphase_move(req, req->rq_next_phase);
                 }
 
-                if (req->rq_phase == RQ_PHASE_COMPLETE) {
-                       list_move_tail(&req->rq_set_chain, &comp_reqs);
-                        continue;
-               }
-
                 if (req->rq_phase == RQ_PHASE_INTERPRET)
                         GOTO(interpret, req->rq_status);
 
                 if (req->rq_phase == RQ_PHASE_INTERPRET)
                         GOTO(interpret, req->rq_status);
 
@@ -1951,27 +1956,27 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                        req->rq_status = -EIO;
                }
 
                        req->rq_status = -EIO;
                }
 
-                ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+               ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
 
 
-        interpret:
-                LASSERT(req->rq_phase == RQ_PHASE_INTERPRET);
+       interpret:
+               LASSERT(req->rq_phase == RQ_PHASE_INTERPRET);
 
 
-                /* This moves to "unregistering" phase we need to wait for
-                 * reply unlink. */
-                if (!unregistered && !ptlrpc_unregister_reply(req, 1)) {
-                        /* start async bulk unlink too */
-                        ptlrpc_unregister_bulk(req, 1);
-                        continue;
-                }
+               /* This moves to "unregistering" phase we need to wait for
+                * reply unlink. */
+               if (!unregistered && !ptlrpc_unregister_reply(req, async)) {
+                       /* start async bulk unlink too */
+                       ptlrpc_unregister_bulk(req, 1);
+                       continue;
+               }
 
 
-                if (!ptlrpc_unregister_bulk(req, 1))
-                        continue;
+               if (!ptlrpc_unregister_bulk(req, async))
+                       continue;
 
 
-                /* When calling interpret receiving already should be
-                 * finished. */
-                LASSERT(!req->rq_receiving_reply);
+               /* When calling interpret receiving already should be
+                * finished. */
+               LASSERT(!req->rq_receiving_reply);
 
 
-                ptlrpc_req_interpret(env, req, req->rq_status);
+               ptlrpc_req_interpret(env, req, req->rq_status);
 
                if (ptlrpcd_check_work(req)) {
                        atomic_dec(&set->set_remaining);
 
                if (ptlrpcd_check_work(req)) {
                        atomic_dec(&set->set_remaining);
@@ -2180,6 +2185,9 @@ static void ptlrpc_interrupted_set(void *data)
                struct ptlrpc_request *req =
                        list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
                struct ptlrpc_request *req =
                        list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
+               if (req->rq_intr)
+                       continue;
+
                if (req->rq_phase != RQ_PHASE_RPC &&
                    req->rq_phase != RQ_PHASE_UNREGISTERING &&
                    !req->rq_allow_intr)
                if (req->rq_phase != RQ_PHASE_RPC &&
                    req->rq_phase != RQ_PHASE_UNREGISTERING &&
                    !req->rq_allow_intr)
@@ -2274,17 +2282,12 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                 CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n",
                        set, timeout);
 
                 CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n",
                        set, timeout);
 
-               if (timeout == 0 && !signal_pending(current))
-                        /*
-                         * No requests are in-flight (ether timed out
-                         * or delayed), so we can allow interrupts.
-                         * We still want to block for a limited time,
-                         * so we allow interrupts during the timeout.
-                         */
-                       lwi = LWI_TIMEOUT_INTR_ALL(cfs_time_seconds(1),
-                                                   ptlrpc_expired_set,
-                                                   ptlrpc_interrupted_set, set);
-               else if (set->set_allow_intr)
+               if ((timeout == 0 && !signal_pending(current)) ||
+                   set->set_allow_intr)
+                       /* No requests are in-flight (ether timed out
+                        * or delayed), so we can allow interrupts.
+                        * We still want to block for a limited time,
+                        * so we allow interrupts during the timeout. */
                        lwi = LWI_TIMEOUT_INTR_ALL(
                                        cfs_time_seconds(timeout ? timeout : 1),
                                        ptlrpc_expired_set,
                        lwi = LWI_TIMEOUT_INTR_ALL(
                                        cfs_time_seconds(timeout ? timeout : 1),
                                        ptlrpc_expired_set,
@@ -2834,9 +2837,6 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
                RETURN(-ENOMEM);
        }
 
                RETURN(-ENOMEM);
        }
 
-       if (req->rq_allow_intr)
-               set->set_allow_intr = 1;
-
        /* for distributed debugging */
        lustre_msg_set_status(req->rq_reqmsg, current_pid());
 
        /* for distributed debugging */
        lustre_msg_set_status(req->rq_reqmsg, current_pid());