From 012834c5e7c7be50ff117cee4ac473d7fee4294d Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Thu, 1 Mar 2018 14:30:36 +0800 Subject: [PATCH] LU-10419 lfsck: skip dead target Do not send LFSCK RPC to dead targets to avoid being blocked. The patch adds warning message when try to send LFSCK RPC on the non-full connection, it is helpful to understand why the LFSCK may be blocked. Signed-off-by: Fan Yong Change-Id: I0599eb961f1aabd58d0de53fd51f25ca1ec8ff34 Reviewed-on: https://review.whamcloud.com/31475 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Lai Siyao Reviewed-by: Oleg Drokin --- lustre/lfsck/lfsck_engine.c | 8 ++++---- lustre/lfsck/lfsck_internal.h | 3 ++- lustre/lfsck/lfsck_layout.c | 2 +- lustre/lfsck/lfsck_lib.c | 27 ++++++++++++++++++++------- lustre/lfsck/lfsck_namespace.c | 2 +- 5 files changed, 28 insertions(+), 14 deletions(-) diff --git a/lustre/lfsck/lfsck_engine.c b/lustre/lfsck/lfsck_engine.c index 31b7efa..dac55bc 100644 --- a/lustre/lfsck/lfsck_engine.c +++ b/lustre/lfsck/lfsck_engine.c @@ -1214,7 +1214,7 @@ again: atomic_inc(<d->ltd_ref); laia->laia_ltd = ltd; spin_unlock(<ds->ltd_lock); - rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, + rc = lfsck_async_request(env, ltd, lr, set, lfsck_async_interpret_common, laia, LFSCK_QUERY); if (rc != 0) { @@ -1310,7 +1310,7 @@ static int lfsck_assistant_notify_others(const struct lu_env *env, LASSERT(ltd != NULL); laia->laia_ltd = ltd; - rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, + rc = lfsck_async_request(env, ltd, lr, set, lfsck_async_interpret_common, laia, LFSCK_NOTIFY); if (rc != 0) { @@ -1440,7 +1440,7 @@ again: atomic_inc(<d->ltd_ref); laia->laia_ltd = ltd; spin_unlock(<ds->ltd_lock); - rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, + rc = lfsck_async_request(env, ltd, lr, set, lfsck_async_interpret_common, laia, LFSCK_NOTIFY); if (rc != 0) { @@ -1510,7 +1510,7 @@ again: atomic_inc(<d->ltd_ref); laia->laia_ltd = ltd; spin_unlock(<ds->ltd_lock); - rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, + rc = lfsck_async_request(env, ltd, lr, set, lfsck_async_interpret_common, laia, LFSCK_NOTIFY); if (rc != 0) { diff --git a/lustre/lfsck/lfsck_internal.h b/lustre/lfsck/lfsck_internal.h index 1edd518..36f3a0e 100644 --- a/lustre/lfsck/lfsck_internal.h +++ b/lustre/lfsck/lfsck_internal.h @@ -477,6 +477,7 @@ struct lfsck_tgt_desc { __u32 ltd_layout_gen; __u32 ltd_namespace_gen; unsigned int ltd_dead:1, + ltd_for_ost:1, ltd_retry_start:1, ltd_layout_done:1, ltd_namespace_done:1, @@ -963,7 +964,7 @@ lfsck_assistant_object_load(const struct lu_env *env, int lfsck_async_interpret_common(const struct lu_env *env, struct ptlrpc_request *req, void *args, int rc); -int lfsck_async_request(const struct lu_env *env, struct obd_export *exp, +int lfsck_async_request(const struct lu_env *env, struct lfsck_tgt_desc *ltd, struct lfsck_request *lr, struct ptlrpc_request_set *set, ptlrpc_interpterer_t interpterer, diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index 712b299..e329cef 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -291,7 +291,7 @@ static void lfsck_layout_assistant_sync_failures(const struct lu_env *env, continue; laia->laia_ltd = ltd; - rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, + rc = lfsck_async_request(env, ltd, lr, set, lfsck_layout_assistant_sync_failures_interpret, laia, LFSCK_NOTIFY); if (rc != 0) { diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index 3de52cd..6f2fa9e 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -2336,7 +2336,7 @@ static int lfsck_stop_notify(const struct lu_env *env, laia->laia_ltd = ltd; laia->laia_lr = lr; - rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, + rc = lfsck_async_request(env, ltd, lr, set, lfsck_async_interpret_common, laia, LFSCK_NOTIFY); if (rc != 0) { @@ -2374,18 +2374,22 @@ static int lfsck_async_interpret(const struct lu_env *env, return 0; } -int lfsck_async_request(const struct lu_env *env, struct obd_export *exp, +int lfsck_async_request(const struct lu_env *env, struct lfsck_tgt_desc *ltd, struct lfsck_request *lr, struct ptlrpc_request_set *set, ptlrpc_interpterer_t interpreter, void *args, int request) { + struct obd_import *imp = class_exp2cliimp(ltd->ltd_exp); struct lfsck_async_interpret_args *laia; struct ptlrpc_request *req; struct lfsck_request *tmp; struct req_format *format; int rc; + if (unlikely(ltd->ltd_dead)) + return -ENODEV; + switch (request) { case LFSCK_NOTIFY: format = &RQF_LFSCK_NOTIFY; @@ -2395,11 +2399,11 @@ int lfsck_async_request(const struct lu_env *env, struct obd_export *exp, break; default: CDEBUG(D_LFSCK, "%s: unknown async request %d: rc = %d\n", - exp->exp_obd->obd_name, request, -EINVAL); + imp->imp_obd->obd_name, request, -EINVAL); return -EINVAL; } - req = ptlrpc_request_alloc(class_exp2cliimp(exp), format); + req = ptlrpc_request_alloc(imp, format); if (req == NULL) return -ENOMEM; @@ -2410,6 +2414,13 @@ int lfsck_async_request(const struct lu_env *env, struct obd_export *exp, return rc; } + if (unlikely(imp->imp_state != LUSTRE_IMP_FULL)) + LCONSOLE_INFO("%s (%d): sending async LFSCK RPC (%u) to %s%4x " + "on non-full connection (%u), may be blocked.\n", + imp->imp_obd->obd_name, current_pid(), + lr->lr_event, ltd->ltd_for_ost ? "OST" : "MDT", + ltd->ltd_index, imp->imp_state); + tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST); *tmp = *lr; ptlrpc_request_set_replen(req); @@ -2461,7 +2472,7 @@ again: laia->laia_ltd = ltd; up_read(<ds->ltd_rw_sem); - rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, + rc = lfsck_async_request(env, ltd, lr, set, lfsck_async_interpret_common, laia, LFSCK_QUERY); if (rc != 0) { @@ -2924,7 +2935,7 @@ static int lfsck_stop_all(const struct lu_env *env, LASSERT(ltd != NULL); laia->laia_ltd = ltd; - rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, + rc = lfsck_async_request(env, ltd, lr, set, lfsck_async_interpret, laia, LFSCK_NOTIFY); if (rc != 0) { @@ -3008,7 +3019,7 @@ again: ltd->ltd_layout_done = 0; ltd->ltd_namespace_done = 0; ltd->ltd_synced_failures = 0; - rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, + rc = lfsck_async_request(env, ltd, lr, set, lfsck_async_interpret, laia, LFSCK_NOTIFY); if (rc != 0) { @@ -3841,6 +3852,8 @@ int lfsck_add_target(const struct lu_env *env, struct dt_device *key, ltd->ltd_tgt = tgt; ltd->ltd_key = key; ltd->ltd_exp = exp; + if (for_ost) + ltd->ltd_for_ost = 1; INIT_LIST_HEAD(<d->ltd_orphan_list); INIT_LIST_HEAD(<d->ltd_layout_list); INIT_LIST_HEAD(<d->ltd_layout_phase_list); diff --git a/lustre/lfsck/lfsck_namespace.c b/lustre/lfsck/lfsck_namespace.c index b613511..29a901d 100644 --- a/lustre/lfsck/lfsck_namespace.c +++ b/lustre/lfsck/lfsck_namespace.c @@ -6537,7 +6537,7 @@ static void lfsck_namespace_assistant_sync_failures(const struct lu_env *env, continue; laia->laia_ltd = ltd; - rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, + rc = lfsck_async_request(env, ltd, lr, set, lfsck_namespace_assistant_sync_failures_interpret, laia, LFSCK_NOTIFY); if (rc != 0) -- 1.8.3.1