From 6dd0be19a97945db5da61ecdf845087b936805fa Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Fri, 24 Jun 2016 12:21:07 +0800 Subject: [PATCH] LU-8407 recovery: more clear message about recovery failure Currently, the DNE recovery depends on the update logs on the MDTs. If fail to get the update logs from some MDT(s), then the recovery cannot go ahead. Different from client-side recovery failure, the cross-MDT recovery failure may cause the namespace inconsistency. Because we does not want to export the inconsistent namespace to client, then we make the recovery (not abort because of timeout) to wait there until related update logs available. So if some MDT does not up or not mount, then the recovery on other MDTs will hung there. As the time going, the client (re)connection will trigger warning message on the MDTs to say about the recovery hung. But such message does not clearly describe what happened. This patch addes callback interface in target_distribute_txn_data, called 'tdtd_show_update_logs_retrievers'. It allows the users to check which MDTs are still in fetching update logs. Then the admin can check related MDTs in detail when hit recovery trouble. This patch also introduce new recovery status "WAITING" for the case of update logs not ready for some MDT(s). Under such case, the non-ready MDTs index and waited time will be shown. Signed-off-by: Fan Yong Change-Id: If5ed4487fe1e6d94f02479d83f6a187d6427b3a7 Reviewed-on: http://review.whamcloud.com/21759 Tested-by: Jenkins Reviewed-by: wangdi Tested-by: Maloo Reviewed-by: Niu Yawei Reviewed-by: Oleg Drokin --- lustre/include/lu_target.h | 5 +++ lustre/ldlm/ldlm_lib.c | 36 ++++++++++++++++----- lustre/lod/lod_dev.c | 55 +++++++++++++++++++++++++++++++++ lustre/obdclass/lprocfs_status_server.c | 28 +++++++++++++++++ 4 files changed, 117 insertions(+), 7 deletions(-) diff --git a/lustre/include/lu_target.h b/lustre/include/lu_target.h index 10b045b..e8f09e7 100644 --- a/lustre/include/lu_target.h +++ b/lustre/include/lu_target.h @@ -83,6 +83,8 @@ struct target_distribute_txn_data; typedef int (*distribute_txn_replay_handler_t)(struct lu_env *env, struct target_distribute_txn_data *tdtd, struct distribute_txn_replay_req *dtrq); +typedef char *(*target_show_update_logs_retrievers_t)(void *data, int *size, + int *count); struct target_distribute_txn_data { /* Distribution ID is used to identify updates log on different * MDTs for one operation */ @@ -113,6 +115,9 @@ struct target_distribute_txn_data { /* Manage the llog recovery threads */ atomic_t tdtd_recovery_threads_count; wait_queue_head_t tdtd_recovery_threads_waitq; + target_show_update_logs_retrievers_t + tdtd_show_update_logs_retrievers; + void *tdtd_show_retrievers_cbdata; }; struct lu_target { diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index e31274b..5a1e716 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -769,14 +769,36 @@ static int target_handle_reconnect(struct lustre_handle *conn, now = cfs_time_current(); deadline = cfs_timer_deadline(&target->obd_recovery_timer); if (cfs_time_before(now, deadline)) { + struct target_distribute_txn_data *tdtd = + class_exp2tgt(exp)->lut_tdtd; + int size = 0; + int count = 0; + char *buf = NULL; + timeout = cfs_duration_sec(cfs_time_sub(deadline, now)); - LCONSOLE_WARN("%s: Client %s (at %s) reconnecting," - " waiting for %d clients in recovery for" - " %d:%.02d\n", target->obd_name, - obd_uuid2str(&exp->exp_client_uuid), - obd_export_nid2str(exp), - target->obd_max_recoverable_clients, - timeout / 60, timeout % 60); + if (tdtd && tdtd->tdtd_show_update_logs_retrievers) + buf = tdtd->tdtd_show_update_logs_retrievers( + tdtd->tdtd_show_retrievers_cbdata, + &size, &count); + + if (count > 0) + LCONSOLE_WARN("%s: Recovery already passed deadline " + "%d:%.02d. It is due to DNE recovery " + "failed/stuck on the %d MDT(s):%s. " + "Please wait until all MDTs recovered " + "or abort the recovery by force.\n", + target->obd_name, timeout / 60, + timeout % 60, count, + buf ? buf : "unknown (not enough RAM)"); + else + LCONSOLE_WARN("%s: Recovery already passed deadline " + "%d:%.02d. If you do not want to wait " + "more, please abort the recovery by " + "force.\n", target->obd_name, + timeout / 60, timeout % 60); + + if (buf != NULL) + OBD_FREE(buf, size); } else { timeout = cfs_duration_sec(cfs_time_sub(now, deadline)); LCONSOLE_WARN("%s: Recovery already passed deadline" diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index c3d67e4..109c6c6 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -739,6 +739,57 @@ static void lod_sub_fini_all_llogs(const struct lu_env *env, lod_putref(lod, ltd); } +static char *lod_show_update_logs_retrievers(void *data, int *size, int *count) +{ + struct lod_device *lod = (struct lod_device *)data; + struct lu_target *lut = lod2lu_dev(lod)->ld_site->ls_tgt; + struct lod_tgt_descs *ltd = &lod->lod_mdt_descs; + struct lod_tgt_desc *tgt = NULL; + char *buf; + int len = 0; + int rc; + int i; + + *count = atomic_read(&lut->lut_tdtd->tdtd_recovery_threads_count); + if (*count == 0) { + *size = 0; + return NULL; + } + + *size = 5 * *count + 1; + OBD_ALLOC(buf, *size); + if (buf == NULL) + return NULL; + + *count = 0; + memset(buf, 0, *size); + + if (!lod->lod_child_got_update_log) { + rc = lodname2mdt_index(lod2obd(lod)->obd_name, &i); + LASSERTF(rc == 0, "Fail to parse target index: rc = %d\n", rc); + + rc = snprintf(buf + len, *size - len, " %04x", i); + LASSERT(rc > 0); + + len += rc; + (*count)++; + } + + cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) { + tgt = LTD_TGT(ltd, i); + if (!tgt->ltd_got_update_log) { + rc = snprintf(buf + len, *size - len, " %04x", i); + if (unlikely(rc <= 0)) + break; + + len += rc; + (*count)++; + } + } + + return buf; +} + /** * Prepare distribute txn * @@ -775,6 +826,10 @@ static int lod_prepare_distribute_txn(const struct lu_env *env, RETURN(rc); } + tdtd->tdtd_show_update_logs_retrievers = + lod_show_update_logs_retrievers; + tdtd->tdtd_show_retrievers_cbdata = lod; + lut->lut_tdtd = tdtd; RETURN(0); diff --git a/lustre/obdclass/lprocfs_status_server.c b/lustre/obdclass/lprocfs_status_server.c index dc3e95c..7808d09b 100644 --- a/lustre/obdclass/lprocfs_status_server.c +++ b/lustre/obdclass/lprocfs_status_server.c @@ -564,6 +564,7 @@ EXPORT_SYMBOL(lprocfs_hash_seq_show); int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data) { struct obd_device *obd = m->private; + struct target_distribute_txn_data *tdtd; LASSERT(obd != NULL); @@ -597,6 +598,33 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data) goto out; } + tdtd = obd->u.obt.obt_lut->lut_tdtd; + if (tdtd && tdtd->tdtd_show_update_logs_retrievers) { + char *buf; + int size = 0; + int count = 0; + + buf = tdtd->tdtd_show_update_logs_retrievers( + tdtd->tdtd_show_retrievers_cbdata, + &size, &count); + if (count > 0) { + seq_printf(m, "WAITING\n"); + seq_printf(m, "non-ready MDTs: %s\n", + buf ? buf : "unknown (not enough RAM)"); + seq_printf(m, "recovery_start: %lu\n", + obd->obd_recovery_start); + seq_printf(m, "time_waited: %lu\n", + cfs_time_current_sec() - + obd->obd_recovery_start); + } + + if (buf != NULL) + OBD_FREE(buf, size); + + if (likely(count > 0)) + return 0; + } + seq_printf(m, "RECOVERING\n"); seq_printf(m, "recovery_start: %lu\n", obd->obd_recovery_start); seq_printf(m, "time_remaining: %lu\n", -- 1.8.3.1