From afcf3026c6ad203b9882eaeac76326357f26fe71 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Wed, 23 Sep 2015 13:40:46 +0800 Subject: [PATCH] LU-6684 lfsck: stop lfsck even if some servers offline It is possible that during the LFSCK scanning, some server, MDT or OST, maybe offline. At that time, if the LFSCK needs to talk with such offline server, related RPC will trigger reconnect to the offline server, and the LFSCK engine has to wait there till the offline server become online or someone deactive the server by force. Under such case, if the admin wants to stop the LFSCK, the stop request will be blocked. It is NOT good usage. This patch allows the lfsck_stop sponsor to send SIGINT signal to the LFSCK engine to make it awake from the infinite waiting status, then the LFSCK can be stopped even if some servers are offline. Signed-off-by: Fan Yong Change-Id: I07e7ae7ca98ebf213888b58d615ae8001d28afbe Reviewed-on: http://review.whamcloud.com/17032 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Lai Siyao Reviewed-by: Oleg Drokin --- lustre/include/lustre_net.h | 4 +++- lustre/include/obd_support.h | 1 + lustre/lfsck/lfsck_engine.c | 2 ++ lustre/lfsck/lfsck_internal.h | 1 + lustre/lfsck/lfsck_layout.c | 29 ++++++++++++++++++----------- lustre/lfsck/lfsck_lib.c | 37 +++++++++++++++++++++++++++++++++++++ lustre/lfsck/lfsck_namespace.c | 6 +++--- lustre/obdclass/obd_mount_server.c | 2 +- lustre/osp/osp_trans.c | 1 + lustre/ptlrpc/client.c | 25 +++++++++++++++++++++++-- lustre/tests/sanity-lfsck.sh | 24 ++++++++++++++++++++++++ 11 files changed, 114 insertions(+), 18 deletions(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 5ba35e3..7512eaa 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -605,6 +605,7 @@ struct ptlrpc_request_set { set_producer_func set_producer; /** opaq argument passed to the producer callback */ void *set_producer_arg; + unsigned int set_allow_intr:1; }; /** @@ -990,7 +991,8 @@ struct ptlrpc_request { * status */ rq_allow_replay:1, /* bulk request, sent to server, but uncommitted */ - rq_unstable:1; + rq_unstable:1, + rq_allow_intr:1; /** @} */ /** server-side flags @{ */ diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 439c409..c1056ee 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -557,6 +557,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LFSCK_LOST_SLAVE_LMV 0x162a #define OBD_FAIL_LFSCK_BAD_SLAVE_LMV 0x162b #define OBD_FAIL_LFSCK_BAD_SLAVE_NAME 0x162c +#define OBD_FAIL_LFSCK_ASSISTANT_DIRECT 0x162d #define OBD_FAIL_LFSCK_NOTIFY_NET 0x16f0 #define OBD_FAIL_LFSCK_QUERY_NET 0x16f1 diff --git a/lustre/lfsck/lfsck_engine.c b/lustre/lfsck/lfsck_engine.c index d1053c1..d63f893 100644 --- a/lustre/lfsck/lfsck_engine.c +++ b/lustre/lfsck/lfsck_engine.c @@ -1577,6 +1577,7 @@ int lfsck_assistant_engine(void *args) } spin_lock(&lad->lad_lock); + lad->lad_task = current; thread_set_flags(athread, SVC_RUNNING); spin_unlock(&lad->lad_lock); wake_up_all(&mthread->t_ctl_waitq); @@ -1820,6 +1821,7 @@ fini: lad->lad_assistant_status = (rc1 != 0 ? rc1 : rc); thread_set_flags(athread, SVC_STOPPED); wake_up_all(&mthread->t_ctl_waitq); + lad->lad_task = NULL; spin_unlock(&lad->lad_lock); CDEBUG(D_LFSCK, "%s: %s LFSCK assistant thread exit: rc = %d\n", diff --git a/lustre/lfsck/lfsck_internal.h b/lustre/lfsck/lfsck_internal.h index 9e0b3af..f04144e 100644 --- a/lustre/lfsck/lfsck_internal.h +++ b/lustre/lfsck/lfsck_internal.h @@ -817,6 +817,7 @@ struct lfsck_assistant_data { const char *lad_name; struct ptlrpc_thread lad_thread; + struct task_struct *lad_task; struct lfsck_assistant_operations *lad_ops; diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index 4630c9f..a74a9e8 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -3248,6 +3248,8 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, if (lso->lso_dead) RETURN(0); + CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_ASSISTANT_DIRECT, cfs_fail_val); + rc = dt_attr_get(env, child, cla); if (rc == -ENOENT) { parent = lfsck_assistant_object_load(env, lfsck, lso); @@ -3968,8 +3970,8 @@ static int lfsck_layout_master_checkpoint(const struct lu_env *env, up_write(&com->lc_sem); CDEBUG(D_LFSCK, "%s: layout LFSCK master checkpoint at the pos [" - LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck), - lfsck->li_pos_current.lp_oit_cookie, rc); + LPU64"], status = %d: rc = %d\n", lfsck_lfsck2name(lfsck), + lfsck->li_pos_current.lp_oit_cookie, lo->ll_status, rc); return rc; } @@ -4002,8 +4004,8 @@ static int lfsck_layout_slave_checkpoint(const struct lu_env *env, up_write(&com->lc_sem); CDEBUG(D_LFSCK, "%s: layout LFSCK slave checkpoint at the pos [" - LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck), - lfsck->li_pos_current.lp_oit_cookie, rc); + LPU64"], status = %d: rc = %d\n", lfsck_lfsck2name(lfsck), + lfsck->li_pos_current.lp_oit_cookie, lo->ll_status, rc); return rc; } @@ -4276,13 +4278,16 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, goto next; } - rc = dt_declare_attr_get(env, cobj); - if (rc != 0) - goto next; + if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_ASSISTANT_DIRECT)) { + rc = dt_declare_attr_get(env, cobj); + if (rc != 0) + goto next; - rc = dt_declare_xattr_get(env, cobj, &buf, XATTR_NAME_FID); - if (rc != 0) - goto next; + rc = dt_declare_xattr_get(env, cobj, &buf, + XATTR_NAME_FID); + if (rc != 0) + goto next; + } if (lso == NULL) { struct lu_attr *attr = &info->lti_la; @@ -4679,13 +4684,13 @@ static int lfsck_layout_slave_post(const struct lu_env *env, int rc; bool done = false; + down_write(&com->lc_sem); rc = lfsck_layout_lastid_store(env, com); if (rc != 0) result = rc; LASSERT(lfsck->li_out_notify != NULL); - down_write(&com->lc_sem); spin_lock(&lfsck->li_lock); if (!init) lo->ll_pos_last_checkpoint = @@ -5148,12 +5153,14 @@ static void lfsck_layout_slave_quit(const struct lu_env *env, LASSERT(llsd != NULL); + down_write(&com->lc_sem); list_for_each_entry_safe(lls, next, &llsd->llsd_seq_list, lls_list) { list_del_init(&lls->lls_list); lfsck_object_put(env, lls->lls_lastid_obj); OBD_FREE_PTR(lls); } + up_write(&com->lc_sem); spin_lock(&llsd->llsd_lock); while (!list_empty(&llsd->llsd_master_list)) { diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index 7158337..1a45e33 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -31,6 +31,7 @@ #define DEBUG_SUBSYSTEM S_LFSCK #include +#include #include #include #include @@ -2497,6 +2498,9 @@ void lfsck_post_generic(const struct lu_env *env, lad->lad_exit = 1; lad->lad_to_post = 1; + CDEBUG(D_LFSCK, "%s: waiting for assistant to do %s post, rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, *result); + wake_up_all(&athread->t_ctl_waitq); l_wait_event(mthread->t_ctl_waitq, (*result > 0 && list_empty(&lad->lad_req_list)) || @@ -2505,6 +2509,9 @@ void lfsck_post_generic(const struct lu_env *env, if (lad->lad_assistant_status < 0) *result = lad->lad_assistant_status; + + CDEBUG(D_LFSCK, "%s: the assistant has done %s post, rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, *result); } int lfsck_double_scan_generic(const struct lu_env *env, @@ -2520,12 +2527,20 @@ int lfsck_double_scan_generic(const struct lu_env *env, else lad->lad_to_double_scan = 1; + CDEBUG(D_LFSCK, "%s: waiting for assistant to do %s double_scan, " + "status %d\n", + lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, status); + wake_up_all(&athread->t_ctl_waitq); l_wait_event(mthread->t_ctl_waitq, lad->lad_in_double_scan || thread_is_stopped(athread), &lwi); + CDEBUG(D_LFSCK, "%s: the assistant has done %s double_scan, " + "status %d\n", lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, + lad->lad_assistant_status); + if (lad->lad_assistant_status < 0) return lad->lad_assistant_status; @@ -3143,6 +3158,28 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key, } thread_set_flags(thread, SVC_STOPPING); + + if (lfsck->li_master) { + struct lfsck_component *com; + struct lfsck_assistant_data *lad; + + list_for_each_entry(com, &lfsck->li_list_scan, lc_link) { + lad = com->lc_data; + spin_lock(&lad->lad_lock); + if (lad->lad_task != NULL) + force_sig(SIGINT, lad->lad_task); + spin_unlock(&lad->lad_lock); + } + + list_for_each_entry(com, &lfsck->li_list_double_scan, lc_link) { + lad = com->lc_data; + spin_lock(&lad->lad_lock); + if (lad->lad_task != NULL) + force_sig(SIGINT, lad->lad_task); + spin_unlock(&lad->lad_lock); + } + } + spin_unlock(&lfsck->li_lock); wake_up_all(&thread->t_ctl_waitq); diff --git a/lustre/lfsck/lfsck_namespace.c b/lustre/lfsck/lfsck_namespace.c index bfbed00..fa448bb 100644 --- a/lustre/lfsck/lfsck_namespace.c +++ b/lustre/lfsck/lfsck_namespace.c @@ -3931,10 +3931,10 @@ static int lfsck_namespace_checkpoint(const struct lu_env *env, log: CDEBUG(D_LFSCK, "%s: namespace LFSCK checkpoint at the pos ["LPU64 - ", "DFID", "LPX64"]: rc = %d\n", lfsck_lfsck2name(lfsck), - lfsck->li_pos_current.lp_oit_cookie, + ", "DFID", "LPX64"], status = %d: rc = %d\n", + lfsck_lfsck2name(lfsck), lfsck->li_pos_current.lp_oit_cookie, PFID(&lfsck->li_pos_current.lp_dir_parent), - lfsck->li_pos_current.lp_dir_cookie, rc); + lfsck->li_pos_current.lp_dir_cookie, ns->ln_status, rc); return rc > 0 ? 0 : rc; } diff --git a/lustre/obdclass/obd_mount_server.c b/lustre/obdclass/obd_mount_server.c index 4452f05..45717b1 100644 --- a/lustre/obdclass/obd_mount_server.c +++ b/lustre/obdclass/obd_mount_server.c @@ -477,7 +477,7 @@ struct obd_export *lustre_find_lwp_by_index(const char *dev, __u32 idx) list_for_each_entry(lwp, &lsi->lsi_lwp_list, obd_lwp_list) { char *ptr = strstr(lwp->obd_name, lwp_name); - if (ptr != NULL) { + if (ptr != NULL && lwp->obd_lwp_export != NULL) { exp = class_export_get(lwp->obd_lwp_export); break; } diff --git a/lustre/osp/osp_trans.c b/lustre/osp/osp_trans.c index 9c2f9140..f6a5bbfe 100644 --- a/lustre/osp/osp_trans.c +++ b/lustre/osp/osp_trans.c @@ -454,6 +454,7 @@ int osp_remote_sync(const struct lu_env *env, struct osp_device *osp, * might be used to retrieve update log during recovery process, so * it will be allowed to send during recovery process */ req->rq_allow_replay = 1; + req->rq_allow_intr = 1; /* Note: some dt index api might return non-zero result here, like * osd_index_ea_lookup, so we should only check rc < 0 here */ diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index e672a30..d665545 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1661,6 +1661,17 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) */ cond_resched(); + /* If the caller requires to allow to be interpreted by force + * and it has really been interpreted, then move the request + * to RQ_PHASE_INTERPRET phase in spite of what the current + * phase is. */ + if (unlikely(req->rq_allow_intr && req->rq_intr)) { + req->rq_status = -EINTR; + ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); + + GOTO(interpret, req->rq_status); + } + if (req->rq_phase == RQ_PHASE_NEW && ptlrpc_send_new_req(req)) { force_timer_recalc = 1; @@ -2170,7 +2181,8 @@ static void ptlrpc_interrupted_set(void *data) list_entry(tmp, struct ptlrpc_request, rq_set_chain); if (req->rq_phase != RQ_PHASE_RPC && - req->rq_phase != RQ_PHASE_UNREGISTERING) + req->rq_phase != RQ_PHASE_UNREGISTERING && + !req->rq_allow_intr) continue; ptlrpc_mark_interrupted(req); @@ -2272,6 +2284,11 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) lwi = LWI_TIMEOUT_INTR_ALL(cfs_time_seconds(1), ptlrpc_expired_set, ptlrpc_interrupted_set, set); + else if (set->set_allow_intr) + lwi = LWI_TIMEOUT_INTR_ALL( + cfs_time_seconds(timeout ? timeout : 1), + ptlrpc_expired_set, + ptlrpc_interrupted_set, set); else /* * At least one request is in flight, so no @@ -2286,7 +2303,8 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) /* LU-769 - if we ignored the signal because it was already * pending when we started, we need to handle it now or we risk * it being ignored forever */ - if (rc == -ETIMEDOUT && !lwi.lwi_allow_intr && + if (rc == -ETIMEDOUT && + (!lwi.lwi_allow_intr || set->set_allow_intr) && signal_pending(current)) { sigset_t blocked_sigs = cfs_block_sigsinv(LUSTRE_FATAL_SIGS); @@ -2816,6 +2834,9 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) RETURN(-ENOMEM); } + if (req->rq_allow_intr) + set->set_allow_intr = 1; + /* for distributed debugging */ lustre_msg_set_status(req->rq_reqmsg, current_pid()); diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 10a892e..86a15ed 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -4291,6 +4291,30 @@ test_31h() { } run_test 31h "Repair the corrupted shard's name entry" +test_32() +{ + lfsck_prep 5 5 + umount_client $MOUNT + + #define OBD_FAIL_LFSCK_ASSISTANT_DIRECT 0x162d + do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x162d + $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!" + + local STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase1" ] || + error "(3) Expect 'scanning-phase1', but got '$STATUS'" + + echo "stop ost1" + stop ost1 > /dev/null || error "(4) Fail to stop OST1!" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + sleep 1 + + echo "stop LFSCK" + $STOP_LFSCK || error "(5) Fail to stop LFSCK!" +} +run_test 32 "stop LFSCK when some OST failed" + # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} OSTSIZE=${SAVED_OSTSIZE} -- 1.8.3.1