Whamcloud - gitweb
LU-6684 lfsck: stop lfsck even if some servers offline 32/17032/6
authorFan Yong <fan.yong@intel.com>
Wed, 23 Sep 2015 05:40:46 +0000 (13:40 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 14 Jan 2016 03:59:26 +0000 (03:59 +0000)
It is possible that during the LFSCK scanning, some server, MDT
or OST, maybe offline. At that time, if the LFSCK needs to talk
with such offline server, related RPC will trigger reconnect to
the offline server, and the LFSCK engine has to wait there till
the offline server become online or someone deactive the server
by force. Under such case, if the admin wants to stop the LFSCK,
the stop request will be blocked. It is NOT good usage.

This patch allows the lfsck_stop sponsor to send SIGINT signal
to the LFSCK engine to make it awake from the infinite waiting
status, then the LFSCK can be stopped even if some servers are
offline.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I07e7ae7ca98ebf213888b58d615ae8001d28afbe
Reviewed-on: http://review.whamcloud.com/17032
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lustre_net.h
lustre/include/obd_support.h
lustre/lfsck/lfsck_engine.c
lustre/lfsck/lfsck_internal.h
lustre/lfsck/lfsck_layout.c
lustre/lfsck/lfsck_lib.c
lustre/lfsck/lfsck_namespace.c
lustre/obdclass/obd_mount_server.c
lustre/osp/osp_trans.c
lustre/ptlrpc/client.c
lustre/tests/sanity-lfsck.sh

index 5ba35e3..7512eaa 100644 (file)
@@ -605,6 +605,7 @@ struct ptlrpc_request_set {
        set_producer_func       set_producer;
        /** opaq argument passed to the producer callback */
        void                    *set_producer_arg;
+       unsigned int             set_allow_intr:1;
 };
 
 /**
@@ -990,7 +991,8 @@ struct ptlrpc_request {
                 * status */
                rq_allow_replay:1,
                /* bulk request, sent to server, but uncommitted */
-               rq_unstable:1;
+               rq_unstable:1,
+               rq_allow_intr:1;
        /** @} */
 
        /** server-side flags @{ */
index 439c409..c1056ee 100644 (file)
@@ -557,6 +557,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LFSCK_LOST_SLAVE_LMV  0x162a
 #define OBD_FAIL_LFSCK_BAD_SLAVE_LMV   0x162b
 #define OBD_FAIL_LFSCK_BAD_SLAVE_NAME  0x162c
+#define OBD_FAIL_LFSCK_ASSISTANT_DIRECT        0x162d
 
 #define OBD_FAIL_LFSCK_NOTIFY_NET      0x16f0
 #define OBD_FAIL_LFSCK_QUERY_NET       0x16f1
index d1053c1..d63f893 100644 (file)
@@ -1577,6 +1577,7 @@ int lfsck_assistant_engine(void *args)
        }
 
        spin_lock(&lad->lad_lock);
+       lad->lad_task = current;
        thread_set_flags(athread, SVC_RUNNING);
        spin_unlock(&lad->lad_lock);
        wake_up_all(&mthread->t_ctl_waitq);
@@ -1820,6 +1821,7 @@ fini:
        lad->lad_assistant_status = (rc1 != 0 ? rc1 : rc);
        thread_set_flags(athread, SVC_STOPPED);
        wake_up_all(&mthread->t_ctl_waitq);
+       lad->lad_task = NULL;
        spin_unlock(&lad->lad_lock);
 
        CDEBUG(D_LFSCK, "%s: %s LFSCK assistant thread exit: rc = %d\n",
index 9e0b3af..f04144e 100644 (file)
@@ -817,6 +817,7 @@ struct lfsck_assistant_data {
 
        const char                              *lad_name;
        struct ptlrpc_thread                     lad_thread;
+       struct task_struct                      *lad_task;
 
        struct lfsck_assistant_operations       *lad_ops;
 
index 4630c9f..a74a9e8 100644 (file)
@@ -3248,6 +3248,8 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env,
        if (lso->lso_dead)
                RETURN(0);
 
+       CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_ASSISTANT_DIRECT, cfs_fail_val);
+
        rc = dt_attr_get(env, child, cla);
        if (rc == -ENOENT) {
                parent = lfsck_assistant_object_load(env, lfsck, lso);
@@ -3968,8 +3970,8 @@ static int lfsck_layout_master_checkpoint(const struct lu_env *env,
        up_write(&com->lc_sem);
 
        CDEBUG(D_LFSCK, "%s: layout LFSCK master checkpoint at the pos ["
-              LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck),
-              lfsck->li_pos_current.lp_oit_cookie, rc);
+              LPU64"], status = %d: rc = %d\n", lfsck_lfsck2name(lfsck),
+              lfsck->li_pos_current.lp_oit_cookie, lo->ll_status, rc);
 
        return rc;
 }
@@ -4002,8 +4004,8 @@ static int lfsck_layout_slave_checkpoint(const struct lu_env *env,
        up_write(&com->lc_sem);
 
        CDEBUG(D_LFSCK, "%s: layout LFSCK slave checkpoint at the pos ["
-              LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck),
-              lfsck->li_pos_current.lp_oit_cookie, rc);
+              LPU64"], status = %d: rc = %d\n", lfsck_lfsck2name(lfsck),
+              lfsck->li_pos_current.lp_oit_cookie, lo->ll_status, rc);
 
        return rc;
 }
@@ -4276,13 +4278,16 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env,
                        goto next;
                }
 
-               rc = dt_declare_attr_get(env, cobj);
-               if (rc != 0)
-                       goto next;
+               if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_ASSISTANT_DIRECT)) {
+                       rc = dt_declare_attr_get(env, cobj);
+                       if (rc != 0)
+                               goto next;
 
-               rc = dt_declare_xattr_get(env, cobj, &buf, XATTR_NAME_FID);
-               if (rc != 0)
-                       goto next;
+                       rc = dt_declare_xattr_get(env, cobj, &buf,
+                                                 XATTR_NAME_FID);
+                       if (rc != 0)
+                               goto next;
+               }
 
                if (lso == NULL) {
                        struct lu_attr *attr = &info->lti_la;
@@ -4679,13 +4684,13 @@ static int lfsck_layout_slave_post(const struct lu_env *env,
        int                      rc;
        bool                     done  = false;
 
+       down_write(&com->lc_sem);
        rc = lfsck_layout_lastid_store(env, com);
        if (rc != 0)
                result = rc;
 
        LASSERT(lfsck->li_out_notify != NULL);
 
-       down_write(&com->lc_sem);
        spin_lock(&lfsck->li_lock);
        if (!init)
                lo->ll_pos_last_checkpoint =
@@ -5148,12 +5153,14 @@ static void lfsck_layout_slave_quit(const struct lu_env *env,
 
        LASSERT(llsd != NULL);
 
+       down_write(&com->lc_sem);
        list_for_each_entry_safe(lls, next, &llsd->llsd_seq_list,
                                 lls_list) {
                list_del_init(&lls->lls_list);
                lfsck_object_put(env, lls->lls_lastid_obj);
                OBD_FREE_PTR(lls);
        }
+       up_write(&com->lc_sem);
 
        spin_lock(&llsd->llsd_lock);
        while (!list_empty(&llsd->llsd_master_list)) {
index 7158337..1a45e33 100644 (file)
@@ -31,6 +31,7 @@
 #define DEBUG_SUBSYSTEM S_LFSCK
 
 #include <linux/kthread.h>
+#include <linux/sched.h>
 #include <libcfs/list.h>
 #include <lu_object.h>
 #include <dt_object.h>
@@ -2497,6 +2498,9 @@ void lfsck_post_generic(const struct lu_env *env,
                lad->lad_exit = 1;
        lad->lad_to_post = 1;
 
+       CDEBUG(D_LFSCK, "%s: waiting for assistant to do %s post, rc = %d\n",
+              lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, *result);
+
        wake_up_all(&athread->t_ctl_waitq);
        l_wait_event(mthread->t_ctl_waitq,
                     (*result > 0 && list_empty(&lad->lad_req_list)) ||
@@ -2505,6 +2509,9 @@ void lfsck_post_generic(const struct lu_env *env,
 
        if (lad->lad_assistant_status < 0)
                *result = lad->lad_assistant_status;
+
+       CDEBUG(D_LFSCK, "%s: the assistant has done %s post, rc = %d\n",
+              lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, *result);
 }
 
 int lfsck_double_scan_generic(const struct lu_env *env,
@@ -2520,12 +2527,20 @@ int lfsck_double_scan_generic(const struct lu_env *env,
        else
                lad->lad_to_double_scan = 1;
 
+       CDEBUG(D_LFSCK, "%s: waiting for assistant to do %s double_scan, "
+              "status %d\n",
+              lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, status);
+
        wake_up_all(&athread->t_ctl_waitq);
        l_wait_event(mthread->t_ctl_waitq,
                     lad->lad_in_double_scan ||
                     thread_is_stopped(athread),
                     &lwi);
 
+       CDEBUG(D_LFSCK, "%s: the assistant has done %s double_scan, "
+              "status %d\n", lfsck_lfsck2name(com->lc_lfsck), lad->lad_name,
+              lad->lad_assistant_status);
+
        if (lad->lad_assistant_status < 0)
                return lad->lad_assistant_status;
 
@@ -3143,6 +3158,28 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key,
        }
 
        thread_set_flags(thread, SVC_STOPPING);
+
+       if (lfsck->li_master) {
+               struct lfsck_component *com;
+               struct lfsck_assistant_data *lad;
+
+               list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
+                       lad = com->lc_data;
+                       spin_lock(&lad->lad_lock);
+                       if (lad->lad_task != NULL)
+                               force_sig(SIGINT, lad->lad_task);
+                       spin_unlock(&lad->lad_lock);
+               }
+
+               list_for_each_entry(com, &lfsck->li_list_double_scan, lc_link) {
+                       lad = com->lc_data;
+                       spin_lock(&lad->lad_lock);
+                       if (lad->lad_task != NULL)
+                               force_sig(SIGINT, lad->lad_task);
+                       spin_unlock(&lad->lad_lock);
+               }
+       }
+
        spin_unlock(&lfsck->li_lock);
 
        wake_up_all(&thread->t_ctl_waitq);
index bfbed00..fa448bb 100644 (file)
@@ -3931,10 +3931,10 @@ static int lfsck_namespace_checkpoint(const struct lu_env *env,
 
 log:
        CDEBUG(D_LFSCK, "%s: namespace LFSCK checkpoint at the pos ["LPU64
-              ", "DFID", "LPX64"]: rc = %d\n", lfsck_lfsck2name(lfsck),
-              lfsck->li_pos_current.lp_oit_cookie,
+              ", "DFID", "LPX64"], status = %d: rc = %d\n",
+              lfsck_lfsck2name(lfsck), lfsck->li_pos_current.lp_oit_cookie,
               PFID(&lfsck->li_pos_current.lp_dir_parent),
-              lfsck->li_pos_current.lp_dir_cookie, rc);
+              lfsck->li_pos_current.lp_dir_cookie, ns->ln_status, rc);
 
        return rc > 0 ? 0 : rc;
 }
index 4452f05..45717b1 100644 (file)
@@ -477,7 +477,7 @@ struct obd_export *lustre_find_lwp_by_index(const char *dev, __u32 idx)
        list_for_each_entry(lwp, &lsi->lsi_lwp_list, obd_lwp_list) {
                char *ptr = strstr(lwp->obd_name, lwp_name);
 
-               if (ptr != NULL) {
+               if (ptr != NULL && lwp->obd_lwp_export != NULL) {
                        exp = class_export_get(lwp->obd_lwp_export);
                        break;
                }
index 9c2f914..f6a5bbf 100644 (file)
@@ -454,6 +454,7 @@ int osp_remote_sync(const struct lu_env *env, struct osp_device *osp,
         * might be used to retrieve update log during recovery process, so
         * it will be allowed to send during recovery process */
        req->rq_allow_replay = 1;
+       req->rq_allow_intr = 1;
 
        /* Note: some dt index api might return non-zero result here, like
         * osd_index_ea_lookup, so we should only check rc < 0 here */
index e672a30..d665545 100644 (file)
@@ -1661,6 +1661,17 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                 */
                cond_resched();
 
+               /* If the caller requires to allow to be interpreted by force
+                * and it has really been interpreted, then move the request
+                * to RQ_PHASE_INTERPRET phase in spite of what the current
+                * phase is. */
+               if (unlikely(req->rq_allow_intr && req->rq_intr)) {
+                       req->rq_status = -EINTR;
+                       ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+
+                       GOTO(interpret, req->rq_status);
+               }
+
                 if (req->rq_phase == RQ_PHASE_NEW &&
                     ptlrpc_send_new_req(req)) {
                         force_timer_recalc = 1;
@@ -2170,7 +2181,8 @@ static void ptlrpc_interrupted_set(void *data)
                        list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
                if (req->rq_phase != RQ_PHASE_RPC &&
-                   req->rq_phase != RQ_PHASE_UNREGISTERING)
+                   req->rq_phase != RQ_PHASE_UNREGISTERING &&
+                   !req->rq_allow_intr)
                        continue;
 
                ptlrpc_mark_interrupted(req);
@@ -2272,6 +2284,11 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                        lwi = LWI_TIMEOUT_INTR_ALL(cfs_time_seconds(1),
                                                    ptlrpc_expired_set,
                                                    ptlrpc_interrupted_set, set);
+               else if (set->set_allow_intr)
+                       lwi = LWI_TIMEOUT_INTR_ALL(
+                                       cfs_time_seconds(timeout ? timeout : 1),
+                                       ptlrpc_expired_set,
+                                       ptlrpc_interrupted_set, set);
                 else
                         /*
                          * At least one request is in flight, so no
@@ -2286,7 +2303,8 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                 /* LU-769 - if we ignored the signal because it was already
                  * pending when we started, we need to handle it now or we risk
                  * it being ignored forever */
-               if (rc == -ETIMEDOUT && !lwi.lwi_allow_intr &&
+               if (rc == -ETIMEDOUT &&
+                   (!lwi.lwi_allow_intr || set->set_allow_intr) &&
                    signal_pending(current)) {
                        sigset_t blocked_sigs =
                                           cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
@@ -2816,6 +2834,9 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
                RETURN(-ENOMEM);
        }
 
+       if (req->rq_allow_intr)
+               set->set_allow_intr = 1;
+
        /* for distributed debugging */
        lustre_msg_set_status(req->rq_reqmsg, current_pid());
 
index 10a892e..86a15ed 100644 (file)
@@ -4291,6 +4291,30 @@ test_31h() {
 }
 run_test 31h "Repair the corrupted shard's name entry"
 
+test_32()
+{
+       lfsck_prep 5 5
+       umount_client $MOUNT
+
+       #define OBD_FAIL_LFSCK_ASSISTANT_DIRECT 0x162d
+       do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x162d
+       $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
+
+       local STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "scanning-phase1" ] ||
+               error "(3) Expect 'scanning-phase1', but got '$STATUS'"
+
+       echo "stop ost1"
+       stop ost1 > /dev/null || error "(4) Fail to stop OST1!"
+
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
+       sleep 1
+
+       echo "stop LFSCK"
+       $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
+}
+run_test 32 "stop LFSCK when some OST failed"
+
 # restore MDS/OST size
 MDSSIZE=${SAVED_MDSSIZE}
 OSTSIZE=${SAVED_OSTSIZE}