Whamcloud - gitweb
LU-8407 recovery: more clear message about recovery failure 59/21759/4
authorFan Yong <fan.yong@intel.com>
Fri, 24 Jun 2016 04:21:07 +0000 (12:21 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 8 Sep 2016 02:06:04 +0000 (02:06 +0000)
Currently, the DNE recovery depends on the update logs on the MDTs.
If fail to get the update logs from some MDT(s), then the recovery
cannot go ahead. Different from client-side recovery failure, the
cross-MDT recovery failure may cause the namespace inconsistency.
Because we does not want to export the inconsistent namespace to
client, then we make the recovery (not abort because of timeout)
to wait there until related update logs available.

So if some MDT does not up or not mount, then the recovery on other
MDTs will hung there. As the time going, the client (re)connection
will trigger warning message on the MDTs to say about the recovery
hung. But such message does not clearly describe what happened.

This patch addes callback interface in target_distribute_txn_data,
called 'tdtd_show_update_logs_retrievers'. It allows the users to
check which MDTs are still in fetching update logs. Then the admin
can check related MDTs in detail when hit recovery trouble.

This patch also introduce new recovery status "WAITING" for the
case of update logs not ready for some MDT(s). Under such case,
the non-ready MDTs index and waited time will be shown.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: If5ed4487fe1e6d94f02479d83f6a187d6427b3a7
Reviewed-on: http://review.whamcloud.com/21759
Tested-by: Jenkins
Reviewed-by: wangdi <di.wang@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Niu Yawei <yawei.niu@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lu_target.h
lustre/ldlm/ldlm_lib.c
lustre/lod/lod_dev.c
lustre/obdclass/lprocfs_status_server.c

index 10b045b..e8f09e7 100644 (file)
@@ -83,6 +83,8 @@ struct target_distribute_txn_data;
 typedef int (*distribute_txn_replay_handler_t)(struct lu_env *env,
                                       struct target_distribute_txn_data *tdtd,
                                       struct distribute_txn_replay_req *dtrq);
+typedef char *(*target_show_update_logs_retrievers_t)(void *data, int *size,
+                                                     int *count);
 struct target_distribute_txn_data {
        /* Distribution ID is used to identify updates log on different
         * MDTs for one operation */
@@ -113,6 +115,9 @@ struct target_distribute_txn_data {
        /* Manage the llog recovery threads */
        atomic_t                tdtd_recovery_threads_count;
        wait_queue_head_t       tdtd_recovery_threads_waitq;
+       target_show_update_logs_retrievers_t
+                               tdtd_show_update_logs_retrievers;
+       void                    *tdtd_show_retrievers_cbdata;
 };
 
 struct lu_target {
index e31274b..5a1e716 100644 (file)
@@ -769,14 +769,36 @@ static int target_handle_reconnect(struct lustre_handle *conn,
        now = cfs_time_current();
        deadline = cfs_timer_deadline(&target->obd_recovery_timer);
        if (cfs_time_before(now, deadline)) {
+               struct target_distribute_txn_data *tdtd =
+                                       class_exp2tgt(exp)->lut_tdtd;
+               int size = 0;
+               int count = 0;
+               char *buf = NULL;
+
                timeout = cfs_duration_sec(cfs_time_sub(deadline, now));
-               LCONSOLE_WARN("%s: Client %s (at %s) reconnecting,"
-                       " waiting for %d clients in recovery for"
-                       " %d:%.02d\n", target->obd_name,
-                       obd_uuid2str(&exp->exp_client_uuid),
-                       obd_export_nid2str(exp),
-                       target->obd_max_recoverable_clients,
-                       timeout / 60, timeout % 60);
+               if (tdtd && tdtd->tdtd_show_update_logs_retrievers)
+                       buf = tdtd->tdtd_show_update_logs_retrievers(
+                               tdtd->tdtd_show_retrievers_cbdata,
+                               &size, &count);
+
+               if (count > 0)
+                       LCONSOLE_WARN("%s: Recovery already passed deadline "
+                                     "%d:%.02d. It is due to DNE recovery "
+                                     "failed/stuck on the %d MDT(s):%s. "
+                                     "Please wait until all MDTs recovered "
+                                     "or abort the recovery by force.\n",
+                                     target->obd_name, timeout / 60,
+                                     timeout % 60, count,
+                                     buf ? buf : "unknown (not enough RAM)");
+               else
+                       LCONSOLE_WARN("%s: Recovery already passed deadline "
+                                     "%d:%.02d. If you do not want to wait "
+                                     "more, please abort the recovery by "
+                                     "force.\n", target->obd_name,
+                                     timeout / 60, timeout % 60);
+
+               if (buf != NULL)
+                       OBD_FREE(buf, size);
        } else {
                timeout = cfs_duration_sec(cfs_time_sub(now, deadline));
                LCONSOLE_WARN("%s: Recovery already passed deadline"
index c3d67e4..109c6c6 100644 (file)
@@ -739,6 +739,57 @@ static void lod_sub_fini_all_llogs(const struct lu_env *env,
        lod_putref(lod, ltd);
 }
 
+static char *lod_show_update_logs_retrievers(void *data, int *size, int *count)
+{
+       struct lod_device       *lod = (struct lod_device *)data;
+       struct lu_target        *lut = lod2lu_dev(lod)->ld_site->ls_tgt;
+       struct lod_tgt_descs    *ltd = &lod->lod_mdt_descs;
+       struct lod_tgt_desc     *tgt = NULL;
+       char                    *buf;
+       int                      len = 0;
+       int                      rc;
+       int                      i;
+
+       *count = atomic_read(&lut->lut_tdtd->tdtd_recovery_threads_count);
+       if (*count == 0) {
+               *size = 0;
+               return NULL;
+       }
+
+       *size = 5 * *count + 1;
+       OBD_ALLOC(buf, *size);
+       if (buf == NULL)
+               return NULL;
+
+       *count = 0;
+       memset(buf, 0, *size);
+
+       if (!lod->lod_child_got_update_log) {
+               rc = lodname2mdt_index(lod2obd(lod)->obd_name, &i);
+               LASSERTF(rc == 0, "Fail to parse target index: rc = %d\n", rc);
+
+               rc = snprintf(buf + len, *size - len, " %04x", i);
+               LASSERT(rc > 0);
+
+               len += rc;
+               (*count)++;
+       }
+
+       cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) {
+               tgt = LTD_TGT(ltd, i);
+               if (!tgt->ltd_got_update_log) {
+                       rc = snprintf(buf + len, *size - len, " %04x", i);
+                       if (unlikely(rc <= 0))
+                               break;
+
+                       len += rc;
+                       (*count)++;
+               }
+       }
+
+       return buf;
+}
+
 /**
  * Prepare distribute txn
  *
@@ -775,6 +826,10 @@ static int lod_prepare_distribute_txn(const struct lu_env *env,
                RETURN(rc);
        }
 
+       tdtd->tdtd_show_update_logs_retrievers =
+               lod_show_update_logs_retrievers;
+       tdtd->tdtd_show_retrievers_cbdata = lod;
+
        lut->lut_tdtd = tdtd;
 
        RETURN(0);
index dc3e95c..7808d09 100644 (file)
@@ -564,6 +564,7 @@ EXPORT_SYMBOL(lprocfs_hash_seq_show);
 int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
 {
        struct obd_device *obd = m->private;
+       struct target_distribute_txn_data *tdtd;
 
        LASSERT(obd != NULL);
 
@@ -597,6 +598,33 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                goto out;
        }
 
+       tdtd = obd->u.obt.obt_lut->lut_tdtd;
+       if (tdtd && tdtd->tdtd_show_update_logs_retrievers) {
+               char *buf;
+               int size = 0;
+               int count = 0;
+
+               buf = tdtd->tdtd_show_update_logs_retrievers(
+                       tdtd->tdtd_show_retrievers_cbdata,
+                       &size, &count);
+               if (count > 0) {
+                       seq_printf(m, "WAITING\n");
+                       seq_printf(m, "non-ready MDTs: %s\n",
+                                  buf ? buf : "unknown (not enough RAM)");
+                       seq_printf(m, "recovery_start: %lu\n",
+                                  obd->obd_recovery_start);
+                       seq_printf(m, "time_waited: %lu\n",
+                                  cfs_time_current_sec() -
+                                  obd->obd_recovery_start);
+               }
+
+               if (buf != NULL)
+                       OBD_FREE(buf, size);
+
+               if (likely(count > 0))
+                       return 0;
+       }
+
        seq_printf(m, "RECOVERING\n");
        seq_printf(m, "recovery_start: %lu\n", obd->obd_recovery_start);
        seq_printf(m, "time_remaining: %lu\n",