Currently, the DNE recovery depends on the update logs on the MDTs.
If fail to get the update logs from some MDT(s), then the recovery
cannot go ahead. Different from client-side recovery failure, the
cross-MDT recovery failure may cause the namespace inconsistency.
Because we does not want to export the inconsistent namespace to
client, then we make the recovery (not abort because of timeout)
to wait there until related update logs available.
So if some MDT does not up or not mount, then the recovery on other
MDTs will hung there. As the time going, the client (re)connection
will trigger warning message on the MDTs to say about the recovery
hung. But such message does not clearly describe what happened.
This patch addes callback interface in target_distribute_txn_data,
called 'tdtd_show_update_logs_retrievers'. It allows the users to
check which MDTs are still in fetching update logs. Then the admin
can check related MDTs in detail when hit recovery trouble.
This patch also introduce new recovery status "WAITING" for the
case of update logs not ready for some MDT(s). Under such case,
the non-ready MDTs index and waited time will be shown.
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: If5ed4487fe1e6d94f02479d83f6a187d6427b3a7
Reviewed-on: http://review.whamcloud.com/21759
Tested-by: Jenkins
Reviewed-by: wangdi <di.wang@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Niu Yawei <yawei.niu@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
typedef int (*distribute_txn_replay_handler_t)(struct lu_env *env,
struct target_distribute_txn_data *tdtd,
struct distribute_txn_replay_req *dtrq);
+typedef char *(*target_show_update_logs_retrievers_t)(void *data, int *size,
+ int *count);
struct target_distribute_txn_data {
/* Distribution ID is used to identify updates log on different
* MDTs for one operation */
/* Manage the llog recovery threads */
atomic_t tdtd_recovery_threads_count;
wait_queue_head_t tdtd_recovery_threads_waitq;
+ target_show_update_logs_retrievers_t
+ tdtd_show_update_logs_retrievers;
+ void *tdtd_show_retrievers_cbdata;
};
struct lu_target {
now = cfs_time_current();
deadline = cfs_timer_deadline(&target->obd_recovery_timer);
if (cfs_time_before(now, deadline)) {
+ struct target_distribute_txn_data *tdtd =
+ class_exp2tgt(exp)->lut_tdtd;
+ int size = 0;
+ int count = 0;
+ char *buf = NULL;
+
timeout = cfs_duration_sec(cfs_time_sub(deadline, now));
- LCONSOLE_WARN("%s: Client %s (at %s) reconnecting,"
- " waiting for %d clients in recovery for"
- " %d:%.02d\n", target->obd_name,
- obd_uuid2str(&exp->exp_client_uuid),
- obd_export_nid2str(exp),
- target->obd_max_recoverable_clients,
- timeout / 60, timeout % 60);
+ if (tdtd && tdtd->tdtd_show_update_logs_retrievers)
+ buf = tdtd->tdtd_show_update_logs_retrievers(
+ tdtd->tdtd_show_retrievers_cbdata,
+ &size, &count);
+
+ if (count > 0)
+ LCONSOLE_WARN("%s: Recovery already passed deadline "
+ "%d:%.02d. It is due to DNE recovery "
+ "failed/stuck on the %d MDT(s):%s. "
+ "Please wait until all MDTs recovered "
+ "or abort the recovery by force.\n",
+ target->obd_name, timeout / 60,
+ timeout % 60, count,
+ buf ? buf : "unknown (not enough RAM)");
+ else
+ LCONSOLE_WARN("%s: Recovery already passed deadline "
+ "%d:%.02d. If you do not want to wait "
+ "more, please abort the recovery by "
+ "force.\n", target->obd_name,
+ timeout / 60, timeout % 60);
+
+ if (buf != NULL)
+ OBD_FREE(buf, size);
} else {
timeout = cfs_duration_sec(cfs_time_sub(now, deadline));
LCONSOLE_WARN("%s: Recovery already passed deadline"
lod_putref(lod, ltd);
}
+static char *lod_show_update_logs_retrievers(void *data, int *size, int *count)
+{
+ struct lod_device *lod = (struct lod_device *)data;
+ struct lu_target *lut = lod2lu_dev(lod)->ld_site->ls_tgt;
+ struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
+ struct lod_tgt_desc *tgt = NULL;
+ char *buf;
+ int len = 0;
+ int rc;
+ int i;
+
+ *count = atomic_read(&lut->lut_tdtd->tdtd_recovery_threads_count);
+ if (*count == 0) {
+ *size = 0;
+ return NULL;
+ }
+
+ *size = 5 * *count + 1;
+ OBD_ALLOC(buf, *size);
+ if (buf == NULL)
+ return NULL;
+
+ *count = 0;
+ memset(buf, 0, *size);
+
+ if (!lod->lod_child_got_update_log) {
+ rc = lodname2mdt_index(lod2obd(lod)->obd_name, &i);
+ LASSERTF(rc == 0, "Fail to parse target index: rc = %d\n", rc);
+
+ rc = snprintf(buf + len, *size - len, " %04x", i);
+ LASSERT(rc > 0);
+
+ len += rc;
+ (*count)++;
+ }
+
+ cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) {
+ tgt = LTD_TGT(ltd, i);
+ if (!tgt->ltd_got_update_log) {
+ rc = snprintf(buf + len, *size - len, " %04x", i);
+ if (unlikely(rc <= 0))
+ break;
+
+ len += rc;
+ (*count)++;
+ }
+ }
+
+ return buf;
+}
+
/**
* Prepare distribute txn
*
RETURN(rc);
}
+ tdtd->tdtd_show_update_logs_retrievers =
+ lod_show_update_logs_retrievers;
+ tdtd->tdtd_show_retrievers_cbdata = lod;
+
lut->lut_tdtd = tdtd;
RETURN(0);
int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
{
struct obd_device *obd = m->private;
+ struct target_distribute_txn_data *tdtd;
LASSERT(obd != NULL);
goto out;
}
+ tdtd = obd->u.obt.obt_lut->lut_tdtd;
+ if (tdtd && tdtd->tdtd_show_update_logs_retrievers) {
+ char *buf;
+ int size = 0;
+ int count = 0;
+
+ buf = tdtd->tdtd_show_update_logs_retrievers(
+ tdtd->tdtd_show_retrievers_cbdata,
+ &size, &count);
+ if (count > 0) {
+ seq_printf(m, "WAITING\n");
+ seq_printf(m, "non-ready MDTs: %s\n",
+ buf ? buf : "unknown (not enough RAM)");
+ seq_printf(m, "recovery_start: %lu\n",
+ obd->obd_recovery_start);
+ seq_printf(m, "time_waited: %lu\n",
+ cfs_time_current_sec() -
+ obd->obd_recovery_start);
+ }
+
+ if (buf != NULL)
+ OBD_FREE(buf, size);
+
+ if (likely(count > 0))
+ return 0;
+ }
+
seq_printf(m, "RECOVERING\n");
seq_printf(m, "recovery_start: %lu\n", obd->obd_recovery_start);
seq_printf(m, "time_remaining: %lu\n",