Whamcloud - gitweb
b=13537
authornathan <nathan>
Tue, 16 Oct 2007 18:40:43 +0000 (18:40 +0000)
committernathan <nathan>
Tue, 16 Oct 2007 18:40:43 +0000 (18:40 +0000)
i=adilger
i=johann
extend reconnect timer as new clients reconnect
fix recovery_status proc file
consistently use xNNN for xids for easier debugging

lustre/include/obd_support.h
lustre/ldlm/ldlm_lib.c
lustre/mds/mds_fs.c
lustre/obdclass/genops.c
lustre/obdclass/lprocfs_status.c
lustre/obdfilter/filter.c
lustre/ptlrpc/client.c
lustre/ptlrpc/import.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/service.c

index f0a0578..2c2a04e 100644 (file)
@@ -52,7 +52,7 @@ extern unsigned int obd_alloc_fail_rate;
 #define LDLM_TIMEOUT_DEFAULT 20
 /* Time to wait for all clients to reconnect during recovery */
 /* Should be very conservative; must catch the first reconnect after reboot */
-#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2)
+#define OBD_RECOVERY_FACTOR (5 / 2) /* times obd_timeout */
 /* Change recovery-small 26b time if you change this */
 #define PING_INTERVAL max(obd_timeout / 4, 1U)
 /* Client may skip 1 ping; wait for 2.5 */
index 8b60e46..2a14fb7 100644 (file)
@@ -751,20 +751,25 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
                 OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_RECONNECT, 2 * obd_timeout);
         }
 
-        /* We want to handle EALREADY but *not* -EALREADY from
-         * target_handle_reconnect(), return reconnection state in a flag */
-        if (rc == EALREADY) {
-                lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT);
-                rc = 0;
-        } else if (rc) {
+        if (rc < 0)
                 GOTO(out, rc);
-        }
 
         /* Tell the client if we're in recovery. */
-        /* If this is the first client, start the recovery timer */
         if (target->obd_recovering) {
                 lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING);
-                target_start_recovery_timer(target, handler, req);
+                /* If this is the first time a client connects,
+                   reset the recovery timer */
+                if (rc == 0)
+                        target_start_recovery_timer(target, handler, req);
+        }
+
+        /* We want to handle EALREADY but *not* -EALREADY from
+         * target_handle_reconnect(), return reconnection state in a flag */
+        if (rc == EALREADY) {
+                lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT);
+                rc = 0;
+        } else {
+                LASSERT(rc == 0);
         }
 
         /* Tell the client if we support replayable requests */
@@ -1040,8 +1045,8 @@ void target_cleanup_recovery(struct obd_device *obd)
 void target_abort_recovery(void *data)
 {
         struct obd_device *obd = data;
-
         ENTRY;
+
         spin_lock_bh(&obd->obd_processing_task_lock);
         if (!obd->obd_recovering) {
                 spin_unlock_bh(&obd->obd_processing_task_lock);
@@ -1049,12 +1054,14 @@ void target_abort_recovery(void *data)
                 return;
         }
         obd->obd_recovering = obd->obd_abort_recovery = 0;
-        obd->obd_recoverable_clients = 0;
         target_cancel_recovery_timer(obd);
         spin_unlock_bh(&obd->obd_processing_task_lock);
 
-        LCONSOLE_WARN("%s: recovery period over; disconnecting unfinished "
-                      "clients.\n", obd->obd_name);
+        LCONSOLE_WARN("%s: recovery period over; %d clients never reconnected "
+                      "after %lds (%d clients did)\n",
+                      obd->obd_name, obd->obd_recoverable_clients,
+                      cfs_time_current_sec()- obd->obd_recovery_start,
+                      obd->obd_connected_clients);
         class_disconnect_stale_exports(obd);
         abort_recovery_queue(obd);
 
@@ -1082,8 +1089,7 @@ void target_cancel_recovery_timer(struct obd_device *obd)
         cfs_timer_disarm(&obd->obd_recovery_timer);
 }
 
-static void reset_recovery_timer(struct obd_device *obd,
-                                 struct ptlrpc_request *req)
+static void reset_recovery_timer(struct obd_device *obd, int mintime)
 {
         spin_lock_bh(&obd->obd_processing_task_lock);
         if (!obd->obd_recovering) {
@@ -1091,35 +1097,34 @@ static void reset_recovery_timer(struct obd_device *obd,
                 return;
         }
         /* Track the client's largest expected replay time */
-        if (lustre_msg_get_timeout(req->rq_reqmsg) > obd->obd_recovery_timeout)
-                obd->obd_recovery_timeout =
-                        lustre_msg_get_timeout(req->rq_reqmsg);
-        LASSERT(obd->obd_recovery_timeout >= OBD_RECOVERY_TIMEOUT);
+        obd->obd_recovery_timeout = max(mintime, obd->obd_recovery_timeout);
+        obd->obd_recovery_end = cfs_time_current_sec() +
+                obd->obd_recovery_timeout;
         cfs_timer_arm(&obd->obd_recovery_timer, 
                       cfs_time_shift(obd->obd_recovery_timeout));
         spin_unlock_bh(&obd->obd_processing_task_lock);
         CDEBUG(D_HA, "%s: recovery timer will expire in %u seconds\n", 
                obd->obd_name, obd->obd_recovery_timeout);
-        obd->obd_recovery_end = cfs_time_current_sec() + 
-                obd->obd_recovery_timeout;
 }
 
 
-/* Only start it the first time called */
+/* Reset the timer with each new client connection */
 void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler,
                                  struct ptlrpc_request *req)
 {
         spin_lock_bh(&obd->obd_processing_task_lock);
-        if (obd->obd_recovery_handler) {
-                spin_unlock_bh(&obd->obd_processing_task_lock);
-                return;
-        }
+        if (obd->obd_recovery_handler)
+                goto out;
         CWARN("%s: starting recovery timer\n", obd->obd_name);
+        obd->obd_recovery_start = cfs_time_current_sec();
+        /* minimum */
+        obd->obd_recovery_timeout = OBD_RECOVERY_FACTOR * obd_timeout;
         obd->obd_recovery_handler = handler;
         cfs_timer_init(&obd->obd_recovery_timer, target_recovery_expired, obd);
+out:
         spin_unlock_bh(&obd->obd_processing_task_lock);
-
-        reset_recovery_timer(obd, req);
+        reset_recovery_timer(obd, OBD_RECOVERY_FACTOR *
+                             lustre_msg_get_timeout(req->rq_reqmsg));
 }
 
 static int check_for_next_transno(struct obd_device *obd)
@@ -1180,9 +1185,10 @@ static void process_recovery_queue(struct obd_device *obd)
                     obd->obd_next_recovery_transno) {
                         spin_unlock_bh(&obd->obd_processing_task_lock);
                         CDEBUG(D_HA, "Waiting for transno "LPD64" (1st is "
-                               LPD64")\n",
+                               LPD64", x"LPU64")\n",
                                obd->obd_next_recovery_transno,
-                               lustre_msg_get_transno(req->rq_reqmsg));
+                               lustre_msg_get_transno(req->rq_reqmsg),
+                               req->rq_xid);
                         l_wait_event(obd->obd_next_transno_waitq,
                                      check_for_next_transno(obd), &lwi);
                         spin_lock_bh(&obd->obd_processing_task_lock);
@@ -1201,7 +1207,8 @@ static void process_recovery_queue(struct obd_device *obd)
                 DEBUG_REQ(D_HA, req, "processing: ");
                 (void)obd->obd_recovery_handler(req);
                 obd->obd_replayed_requests++;
-                reset_recovery_timer(obd, req);
+                reset_recovery_timer(obd, 2 * /* safety */
+                        at_get(&req->rq_rqbd->rqbd_service->srv_at_estimate));
                 /* bug 1580: decide how to properly sync() in recovery */
                 //mds_fsync_super(obd->u.obt.obt_sb);
                 class_export_put(req->rq_export);
@@ -1553,9 +1560,10 @@ void target_committed_to_req(struct ptlrpc_request *req)
                 lustre_msg_set_last_committed(req->rq_repmsg,
                                               obd->obd_last_committed);
         else
-                DEBUG_REQ(D_IOCTL, req, "not sending last_committed update");
+                DEBUG_REQ(D_IOCTL, req, "not sending last_committed update (%d/"
+                          "%d)", obd->obd_no_transno, req->rq_repmsg == NULL);
 
-        CDEBUG(D_INFO, "last_committed "LPU64", xid "LPU64"\n",
+        CDEBUG(D_INFO, "last_committed x"LPU64", this req x"LPU64"\n",
                obd->obd_last_committed, req->rq_xid);
 }
 
index e55621d..5082548 100644 (file)
@@ -446,10 +446,8 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file)
                       obd->obd_recoverable_clients, mds->mds_last_transno);
                 obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
                 obd->obd_recovering = 1;
-                obd->obd_recovery_start = CURRENT_SECONDS;
-                obd->obd_recovery_timeout = OBD_RECOVERY_TIMEOUT;
-                obd->obd_recovery_end = obd->obd_recovery_start +
-                        obd->obd_recovery_timeout;
+                obd->obd_recovery_start = 0;
+                obd->obd_recovery_end = 0;
         }
 
         mds->mds_mount_count = mount_count + 1;
index 89a7b5c..9d3d0ed 100644 (file)
@@ -958,9 +958,11 @@ static void class_disconnect_export_list(struct list_head *list, int flags)
                 fake_exp->exp_flags = flags;
                 spin_unlock(&fake_exp->exp_lock);
 
+                CDEBUG(D_HA, "%s: disconnecting export at %s (%p), "
+                       "last request at %ld\n",
+                       exp->exp_obd->obd_name, obd_export_nid2str(exp),
+                       exp, exp->exp_last_request_time);
                 rc = obd_disconnect(fake_exp);
-                CDEBUG(D_HA, "disconnected export at %s (%p): rc %d\n",
-                       obd_export_nid2str(exp), exp, rc);
                 class_export_put(exp);
         }
         EXIT;
index 53c76e5..8ed0960 100644 (file)
@@ -1486,7 +1486,6 @@ int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
 
         if (lprocfs_obd_snprintf(&page, size, &len, "status: ") <= 0)
                 goto out;
-
         if (obd->obd_max_recoverable_clients == 0) {
                 lprocfs_obd_snprintf(&page, size, &len, "INACTIVE\n");
                 goto fclose;
@@ -1496,64 +1495,60 @@ int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
         if (obd->obd_recovering == 0) {
                 if (lprocfs_obd_snprintf(&page, size, &len, "COMPLETE\n") <= 0)
                         goto out;
-
-                if (lprocfs_obd_snprintf(&page, size, &len, "recovery_start: %lu\n",
-                    obd->obd_recovery_start) <= 0)
+                if (lprocfs_obd_snprintf(&page, size, &len,
+                                         "recovery_start: %lu\n",
+                                         obd->obd_recovery_start) <= 0)
                         goto out;
-
-                if (lprocfs_obd_snprintf(&page, size, &len, "recovery_end: %lu\n",
-                    obd->obd_recovery_end) <= 0)
+                if (lprocfs_obd_snprintf(&page, size, &len,
+                                         "recovery_duration: %lu\n",
+                                         obd->obd_recovery_end -
+                                         obd->obd_recovery_start) <= 0)
                         goto out;
-
-                /* Number of clients have have completed recovery */
-                if (lprocfs_obd_snprintf(&page, size, &len, "recovered_clients: %d\n",
-                    obd->obd_max_recoverable_clients - obd->obd_recoverable_clients) <= 0)
+                /* Number of clients that have completed recovery */
+                if (lprocfs_obd_snprintf(&page, size, &len,
+                                         "completed_clients: %d/%d\n",
+                                         obd->obd_max_recoverable_clients -
+                                         obd->obd_recoverable_clients,
+                                         obd->obd_max_recoverable_clients) <= 0)
                         goto out;
-
-                if (lprocfs_obd_snprintf(&page, size, &len, "unrecovered_clients: %d\n",
-                    obd->obd_recoverable_clients) <= 0)
+                if (lprocfs_obd_snprintf(&page, size, &len,
+                                         "replayed_requests: %d\n",
+                                         obd->obd_replayed_requests) <= 0)
                         goto out;
-
-                if (lprocfs_obd_snprintf(&page, size, &len, "last_transno: "LPD64"\n",
-                    obd->obd_next_recovery_transno - 1) <= 0)
+                if (lprocfs_obd_snprintf(&page, size, &len,
+                                         "last_transno: "LPD64"\n",
+                                         obd->obd_next_recovery_transno - 1)<=0)
                         goto out;
-
-                lprocfs_obd_snprintf(&page, size, &len, "replayed_requests: %d\n", obd->obd_replayed_requests);
                 goto fclose;
         }
 
         if (lprocfs_obd_snprintf(&page, size, &len, "RECOVERING\n") <= 0)
                 goto out;
-
         if (lprocfs_obd_snprintf(&page, size, &len, "recovery_start: %lu\n",
-            obd->obd_recovery_start) <= 0)
+                                 obd->obd_recovery_start) <= 0)
                 goto out;
-
-        if (lprocfs_obd_snprintf(&page, size, &len, "time remaining: %lu\n",
-                                 CURRENT_SECONDS >= obd->obd_recovery_end ? 0 :
-                                 obd->obd_recovery_end - CURRENT_SECONDS) <= 0)
+        if (lprocfs_obd_snprintf(&page, size, &len, "time_remaining: %lu\n",
+                           cfs_time_current_sec() >= obd->obd_recovery_end ? 0 :
+                           obd->obd_recovery_end - cfs_time_current_sec()) <= 0)
                 goto out;
-
-        if(lprocfs_obd_snprintf(&page, size, &len, "connected_clients: %d/%d\n",
-                                obd->obd_connected_clients,
-                                obd->obd_max_recoverable_clients) <= 0)
+        if (lprocfs_obd_snprintf(&page, size, &len,"connected_clients: %d/%d\n",
+                                 obd->obd_connected_clients,
+                                 obd->obd_max_recoverable_clients) <= 0)
                 goto out;
-
-        /* Number of clients have have completed recovery */
-        if (lprocfs_obd_snprintf(&page, size, &len, "completed_clients: %d/%d\n",
-                                 obd->obd_max_recoverable_clients - obd->obd_recoverable_clients,
+        /* Number of clients that have completed recovery */
+        if (lprocfs_obd_snprintf(&page, size, &len,"completed_clients: %d/%d\n",
+                                 obd->obd_max_recoverable_clients -
+                                 obd->obd_recoverable_clients,
                                  obd->obd_max_recoverable_clients) <= 0)
                 goto out;
-
-        if (lprocfs_obd_snprintf(&page, size, &len, "replayed_requests: %d/??\n",
+        if (lprocfs_obd_snprintf(&page, size, &len,"replayed_requests: %d/??\n",
                                  obd->obd_replayed_requests) <= 0)
                 goto out;
-
         if (lprocfs_obd_snprintf(&page, size, &len, "queued_requests: %d\n",
                                  obd->obd_requests_queued_for_recovery) <= 0)
                 goto out;
-
-        lprocfs_obd_snprintf(&page, size, &len, "next_transno: "LPD64"\n", obd->obd_next_recovery_transno);
+        lprocfs_obd_snprintf(&page, size, &len, "next_transno: "LPD64"\n",
+                             obd->obd_next_recovery_transno);
 
 fclose:
         *eof = 1;
index 3ef1477..2356aa0 100644 (file)
@@ -805,10 +805,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
                       le64_to_cpu(fsd->lsd_last_transno));
                 obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
                 obd->obd_recovering = 1;
-                obd->obd_recovery_start = CURRENT_SECONDS;
-                obd->obd_recovery_timeout = OBD_RECOVERY_TIMEOUT;
-                obd->obd_recovery_end = obd->obd_recovery_start +
-                        obd->obd_recovery_timeout;
+                obd->obd_recovery_start = 0;
+                obd->obd_recovery_end = 0;
         }
 
 out:
index c85f9e6..c182672 100644 (file)
@@ -1556,6 +1556,7 @@ void ptlrpc_free_committed(struct obd_import *imp)
             imp->imp_generation == imp->imp_last_generation_checked) {
                 CDEBUG(D_RPCTRACE, "%s: skip recheck: last_committed "LPU64"\n",
                        imp->imp_obd->obd_name, imp->imp_peer_committed_transno);
+                EXIT;
                 return;
         }
 
@@ -1990,12 +1991,9 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
         ENTRY;
 
         LASSERT(req->rq_import->imp_state == LUSTRE_IMP_REPLAY);
-
         /* Not handling automatic bulk replay yet (or ever?) */
         LASSERT(req->rq_bulk == NULL);
 
-        DEBUG_REQ(D_HA, req, "REPLAY");
-
         LASSERT (sizeof (*aa) <= sizeof (req->rq_async_args));
         aa = (struct ptlrpc_replay_async_args *)&req->rq_async_args;
         memset(aa, 0, sizeof *aa);
@@ -2011,6 +2009,8 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
         /* Readjust the timeout for current conditions */
         ptlrpc_at_set_req_timeout(req);
 
+        DEBUG_REQ(D_HA, req, "REPLAY");
+
         atomic_inc(&req->rq_import->imp_replay_inflight);
         ptlrpc_request_addref(req); /* ptlrpcd needs a ref */
 
index 2636df8..5a9e9db 100644 (file)
@@ -338,7 +338,8 @@ static int import_select_connection(struct obd_import *imp)
                                CONNECTION_SWITCH_INC);
                 }
                 LASSERT(imp_conn->oic_last_attempt);
-                CWARN("Tried all connections, increasing latency to %ds\n",
+                CWARN("%s: tried all connections, increasing latency to %ds\n",
+                      imp->imp_obd->obd_name,
                       at_get(&imp->imp_at.iat_net_latency));
         }
 
@@ -641,6 +642,8 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
                 }
 
                 if (imp->imp_invalid) {
+                        CDEBUG(D_HA, "%s: reconnected but import is invalid; "
+                               "marking evicted\n", imp->imp_obd->obd_name);
                         IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
                 } else if (MSG_CONNECT_RECOVERING & msg_flags) {
                         CDEBUG(D_HA, "%s: reconnected to %s during replay\n",
@@ -662,7 +665,8 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
                 imp->imp_last_replay_transno = 0;
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
         } else {
-                DEBUG_REQ(D_HA, request, "evicting, flags=%x", msg_flags);
+                DEBUG_REQ(D_HA, request, "evicting (not initial connect and "
+                          "flags reconnect/recovering not set: %x)",msg_flags);
                 imp->imp_remote_handle =
                                 *lustre_msg_get_handle(request->rq_repmsg);
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
index 9d88451..d6e0f8f 100644 (file)
@@ -2398,7 +2398,8 @@ void _debug_req(struct ptlrpc_request *req, __u32 mask,
         libcfs_debug_vmsg2(data->msg_cdls, data->msg_subsys, mask, data->msg_file,
                            data->msg_fn, data->msg_line, fmt, args,
                            " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d "
-                           "e %d ref %d fl "REQ_FLAGS_FMT"/%x/%x rc %d/%d\n",
+                           "e %d to %d dl %ld ref %d fl "REQ_FLAGS_FMT"/%x/%x "
+                           "rc %d/%d\n",
                            req, req->rq_xid, req->rq_transno,
                            req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : -1,
                            req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) :
@@ -2410,7 +2411,8 @@ void _debug_req(struct ptlrpc_request *req, __u32 mask,
                                 (char *)req->rq_export->exp_connection->c_remote_uuid.uuid : "<?>",
                            (req->rq_import && req->rq_import->imp_client) ?
                                 req->rq_import->imp_client->cli_request_portal : -1,
-                           req->rq_reqlen, req->rq_replen, req->rq_early_count,
+                           req->rq_reqlen, req->rq_replen,
+                           req->rq_early_count, req->rq_timeout, req->rq_deadline,
                            atomic_read(&req->rq_refcount), DEBUG_REQ_FLAGS(req),
                            req->rq_reqmsg ? lustre_msg_get_flags(req->rq_reqmsg) : 0,
                            req->rq_repmsg ? lustre_msg_get_flags(req->rq_repmsg) : 0,
index 40bddf1..275ebe8 100644 (file)
@@ -997,7 +997,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc,
         request->rq_phase = RQ_PHASE_INTERPRET;
 
         CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:nid:opc "
-               "%s:%s+%d:%d:"LPU64":%s:%d\n", cfs_curproc_comm(),
+               "%s:%s+%d:%d:x"LPU64":%s:%d\n", cfs_curproc_comm(),
                (request->rq_export ?
                 (char *)request->rq_export->exp_client_uuid.uuid : "0"),
                (request->rq_export ?
@@ -1013,7 +1013,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc,
         request->rq_phase = RQ_PHASE_COMPLETE;
 
         CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc "
-               "%s:%s+%d:%d:"LPU64":%s:%d\n", cfs_curproc_comm(),
+               "%s:%s+%d:%d:x"LPU64":%s:%d\n", cfs_curproc_comm(),
                (request->rq_export ?
                 (char *)request->rq_export->exp_client_uuid.uuid : "0"),
                (request->rq_export ?
@@ -1031,14 +1031,15 @@ put_conn:
                 class_export_put(request->rq_export);
 
         if (cfs_time_current_sec() > request->rq_deadline) {
-                DEBUG_REQ(D_WARNING, request, "Request took longer than"
-                          " estimated (%+lds); client may timeout.",
+                DEBUG_REQ(D_WARNING, request, "Request x"LPU64" took longer "
+                          "than estimated (%+lds); client may timeout.",
+                          request->rq_xid,
                           cfs_time_current_sec() - request->rq_deadline);
         }
 
         do_gettimeofday(&work_end);
         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
-        CDEBUG(D_RPCTRACE, "request "LPU64" opc %u from %s processed in "
+        CDEBUG(D_RPCTRACE, "request x"LPU64" opc %u from %s processed in "
                "%ldus (%ldus total) trans "LPU64" rc %d/%d\n",
                request->rq_xid, lustre_msg_get_opc(request->rq_reqmsg),
                libcfs_id2str(request->rq_peer), timediff,