LU-8649 recovery: print some useful messages in recovery

author Emoly Liu <emoly.liu@intel.com>

Mon, 25 Dec 2017 03:34:36 +0000 (11:34 +0800)

committer Oleg Drokin <oleg.drokin@intel.com>

Tue, 9 Jan 2018 05:35:08 +0000 (05:35 +0000)
author Emoly Liu <emoly.liu@intel.com>
Mon, 25 Dec 2017 03:34:36 +0000 (11:34 +0800)
committer Oleg Drokin <oleg.drokin@intel.com>
Tue, 9 Jan 2018 05:35:08 +0000 (05:35 +0000)
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c

index 5e8a03f..7b40b98 100644 (file)
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -1966,7 +1966,18 @@ static int target_recovery_overseer(struct lu_target *lut,
  {
         struct obd_device       *obd = lut->lut_obd;
         struct target_distribute_txn_data *tdtd;
+       time64_t last = 0;
+       time64_t now;
  repeat:
+       if (obd->obd_recovering && obd->obd_recovery_start == 0) {
+               now = ktime_get_seconds();
+               if (now - last > 600) {
+                       LCONSOLE_INFO("%s: in recovery but waiting for "
+                                     "the first client to connect\n",
+                                     obd->obd_name);
+                       last = now;
+               }
+       }
         if (obd->obd_recovery_start != 0 && ktime_get_real_seconds() >=
               (obd->obd_recovery_start + obd->obd_recovery_time_hard)) {
                 __u64 next_update_transno = 0;
diff --git a/lustre/obdclass/lprocfs_status_server.c b/lustre/obdclass/lprocfs_status_server.c

index bade9df..0e3e998 100644 (file)
--- a/lustre/obdclass/lprocfs_status_server.c
+++ b/lustre/obdclass/lprocfs_status_server.c
@@ -624,7 +624,13 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                         OBD_FREE(buf, size);
  
                 if (likely(count > 0))
-                       return 0;
+                       goto out;
+       }
+
+       /* recovery won't start until the clients connect */
+       if (obd->obd_recovery_start == 0) {
+               seq_printf(m, "WAITING_FOR_CLIENTS\n");
+               goto out;
         }
  
         seq_printf(m, "RECOVERING\n");
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index 6afea7e..8d4c648 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -2116,7 +2116,9 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
  
  put_conn:
         if (unlikely(ktime_get_real_seconds() > request->rq_deadline)) {
-               DEBUG_REQ(D_WARNING, request, "Request took longer than estimated (%lld:%llds); client may timeout.",
+               DEBUG_REQ(D_WARNING, request,
+                         "Request took longer than estimated (%lld:%llds); "
+                         "client may timeout.",
                           request->rq_deadline -
                           request->rq_arrival_time.tv_sec,
                           ktime_get_real_seconds() - request->rq_deadline);
@@ -2125,40 +2127,42 @@ put_conn:
         work_end = ktime_get_real();
         timediff_usecs = ktime_us_delta(work_end, work_start);
         arrived_usecs = ktime_us_delta(work_end, arrived);
-       CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d Request procesed in %lldus (%lldus total) trans %llu rc %d/%d\n",
-               current_comm(),
-               (request->rq_export ?
-                (char *)request->rq_export->exp_client_uuid.uuid : "0"),
-                (request->rq_export ?
-                atomic_read(&request->rq_export->exp_refcount) : -99),
-                lustre_msg_get_status(request->rq_reqmsg),
-                request->rq_xid,
-                libcfs_id2str(request->rq_peer),
-                lustre_msg_get_opc(request->rq_reqmsg),
-               timediff_usecs,
-               arrived_usecs,
-                (request->rq_repmsg ?
-                 lustre_msg_get_transno(request->rq_repmsg) :
-                 request->rq_transno),
-                request->rq_status,
-                (request->rq_repmsg ?
-                 lustre_msg_get_status(request->rq_repmsg) : -999));
-        if (likely(svc->srv_stats != NULL && request->rq_reqmsg != NULL)) {
-                __u32 op = lustre_msg_get_opc(request->rq_reqmsg);
-                int opc = opcode_offset(op);
-                if (opc > 0 && !(op == LDLM_ENQUEUE || op == MDS_REINT)) {
-                        LASSERT(opc < LUSTRE_MAX_OPCODES);
-                        lprocfs_counter_add(svc->srv_stats,
-                                            opc + EXTRA_MAX_OPCODES,
+       CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc "
+              "%s:%s+%d:%d:x%llu:%s:%d Request processed in %lldus "
+              "(%lldus total) trans %llu rc %d/%d\n",
+              current_comm(),
+              (request->rq_export ?
+              (char *)request->rq_export->exp_client_uuid.uuid : "0"),
+              (request->rq_export ?
+              atomic_read(&request->rq_export->exp_refcount) : -99),
+              lustre_msg_get_status(request->rq_reqmsg),
+              request->rq_xid,
+              libcfs_id2str(request->rq_peer),
+              lustre_msg_get_opc(request->rq_reqmsg),
+              timediff_usecs,
+              arrived_usecs,
+              (request->rq_repmsg ?
+              lustre_msg_get_transno(request->rq_repmsg) :
+              request->rq_transno),
+              request->rq_status,
+              (request->rq_repmsg ?
+              lustre_msg_get_status(request->rq_repmsg) : -999));
+       if (likely(svc->srv_stats != NULL && request->rq_reqmsg != NULL)) {
+               __u32 op = lustre_msg_get_opc(request->rq_reqmsg);
+               int opc = opcode_offset(op);
+               if (opc > 0 && !(op == LDLM_ENQUEUE || op == MDS_REINT)) {
+                       LASSERT(opc < LUSTRE_MAX_OPCODES);
+                       lprocfs_counter_add(svc->srv_stats,
+                                           opc + EXTRA_MAX_OPCODES,
                                             timediff_usecs);
-                }
-        }
-        if (unlikely(request->rq_early_count)) {
-                DEBUG_REQ(D_ADAPTTO, request,
+               }
+       }
+       if (unlikely(request->rq_early_count)) {
+               DEBUG_REQ(D_ADAPTTO, request,
                           "sent %d early replies before finishing in %llds",
-                          request->rq_early_count,
+                         request->rq_early_count,
                           arrived_usecs / USEC_PER_SEC);
-        }
+       }
  
         ptlrpc_server_finish_active_request(svcpt, request);
author	Emoly Liu <emoly.liu@intel.com>
	Mon, 25 Dec 2017 03:34:36 +0000 (11:34 +0800)
committer	Oleg Drokin <oleg.drokin@intel.com>
	Tue, 9 Jan 2018 05:35:08 +0000 (05:35 +0000)
lustre/ldlm/ldlm_lib.c		patch \| blob \| history
lustre/obdclass/lprocfs_status_server.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history