Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / ptlrpc / import.c
index f1c3a48..e6cde5c 100644 (file)
@@ -59,6 +59,17 @@ struct ptlrpc_connect_async_args {
         int pcaa_initial_connect;
 };
 
+static void __import_set_state(struct obd_import *imp,
+                               enum lustre_imp_state state)
+{
+        imp->imp_state = state;
+        imp->imp_state_hist[imp->imp_state_hist_idx].ish_state = state;
+        imp->imp_state_hist[imp->imp_state_hist_idx].ish_time =
+                cfs_time_current_sec();
+        imp->imp_state_hist_idx = (imp->imp_state_hist_idx + 1) %
+                IMP_STATE_HIST_LEN;
+}
+
 /* A CLOSED import should remain so. */
 #define IMPORT_SET_STATE_NOLOCK(imp, state)                                    \
 do {                                                                           \
@@ -67,7 +78,7 @@ do {                                                                           \
                       imp, obd2cli_tgt(imp->imp_obd),                          \
                       ptlrpc_import_state_name(imp->imp_state),                \
                       ptlrpc_import_state_name(state));                        \
-               imp->imp_state = state;                                         \
+               __import_set_state(imp, state);                                 \
         }                                                                      \
 } while(0)
 
@@ -282,11 +293,16 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
                 /* Calculate max timeout for waiting on rpcs to error
                  * out. Use obd_timeout if calculated value is smaller
                  * than it. */
-                timeout = ptlrpc_inflight_timeout(imp);
-                timeout += timeout / 3;
+                if (!OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
+                        timeout = ptlrpc_inflight_timeout(imp);
+                        timeout += timeout / 3;
 
-                if (timeout == 0)
-                        timeout = obd_timeout;
+                        if (timeout == 0)
+                                timeout = obd_timeout;
+                } else {
+                        /* decrease the interval to increase race condition */
+                        timeout = 1;
+                }
 
                 CDEBUG(D_RPCTRACE,"Sleeping %d sec for inflight to error out\n",
                        timeout);
@@ -296,7 +312,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
                  * have been locally cancelled by ptlrpc_abort_inflight. */
                 lwi = LWI_TIMEOUT_INTERVAL(
                         cfs_timeout_cap(cfs_time_seconds(timeout)),
-                        cfs_time_seconds(1), NULL, NULL);
+                        (timeout > 1)?cfs_time_seconds(1):cfs_time_seconds(1)/2,
+                        NULL, NULL);
                 rc = l_wait_event(imp->imp_recovery_waitq,
                                 (atomic_read(&imp->imp_inflight) == 0), &lwi);
                 if (rc) {
@@ -306,31 +323,40 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
                                cli_tgt, rc, atomic_read(&imp->imp_inflight));
 
                         spin_lock(&imp->imp_lock);
-                        list_for_each_safe(tmp, n, &imp->imp_sending_list) {
-                                req = list_entry(tmp, struct ptlrpc_request,
-                                                 rq_list);
-                                DEBUG_REQ(D_ERROR, req,"still on sending list");
-                        }
-                        list_for_each_safe(tmp, n, &imp->imp_delayed_list) {
-                                req = list_entry(tmp, struct ptlrpc_request,
-                                                 rq_list);
-                                DEBUG_REQ(D_ERROR, req,"still on delayed list");
-                        }
+                        if (atomic_read(&imp->imp_inflight) == 0) {
+                                int count = atomic_read(&imp->imp_unregistering);
 
-                        if (atomic_read(&imp->imp_unregistering) == 0) {
-                                /* We know that only "unregistering" rpcs may
-                                 * still survive in sending or delaying lists
-                                 * (They are waiting for long reply unlink in
+                                /* We know that "unregistering" rpcs only can
+                                 * survive in sending or delaying lists (they
+                                 * maybe waiting for long reply unlink in
                                  * sluggish nets). Let's check this. If there
-                                 * is no unregistering and inflight != 0 this
+                                 * is no inflight and unregistering != 0, this
                                  * is bug. */
-                                LASSERT(atomic_read(&imp->imp_inflight) == 0);
+                                LASSERTF(count == 0, "Some RPCs are still "
+                                         "unregistering: %d\n", count);
 
                                 /* Let's save one loop as soon as inflight have
                                  * dropped to zero. No new inflights possible at
                                  * this point. */
                                 rc = 0;
                         } else {
+                                list_for_each_safe(tmp, n,
+                                                   &imp->imp_sending_list) {
+                                        req = list_entry(tmp,
+                                                         struct ptlrpc_request,
+                                                         rq_list);
+                                        DEBUG_REQ(D_ERROR, req,
+                                                  "still on sending list");
+                                }
+                                list_for_each_safe(tmp, n,
+                                                   &imp->imp_delayed_list) {
+                                        req = list_entry(tmp,
+                                                         struct ptlrpc_request,
+                                                         rq_list);
+                                        DEBUG_REQ(D_ERROR, req,
+                                                  "still on delayed list");
+                                }
+
                                 CERROR("%s: RPCs in \"%s\" phase found (%d). "
                                        "Network is sluggish? Waiting them "
                                        "to error out.\n", cli_tgt,
@@ -452,7 +478,7 @@ static int import_select_connection(struct obd_import *imp)
                         continue;
                 }
 
-                /* If we have not tried this connection since the
+                /* If we have not tried this connection since
                    the last successful attempt, go with this one */
                 if ((conn->oic_last_attempt == 0) ||
                     cfs_time_beforeq_64(conn->oic_last_attempt,
@@ -473,7 +499,7 @@ static int import_select_connection(struct obd_import *imp)
         }
 
         /* if not found, simply choose the current one */
-        if (!imp_conn) {
+        if (!imp_conn || imp->imp_force_reconnect) {
                 LASSERT(imp->imp_conn_current);
                 imp_conn = imp->imp_conn_current;
                 tried_all = 0;
@@ -515,10 +541,9 @@ static int import_select_connection(struct obd_import *imp)
 
         if (imp->imp_conn_current != imp_conn) {
                 if (imp->imp_conn_current)
-                        LCONSOLE_INFO("Changing connection for %s to %s/%s\n",
-                                      imp->imp_obd->obd_name,
-                                      imp_conn->oic_uuid.uuid,
-                                      libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+                        CDEBUG(D_HA, "Changing connection for %s to %s/%s\n",
+                               imp->imp_obd->obd_name, imp_conn->oic_uuid.uuid,
+                               libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
                 imp->imp_conn_current = imp_conn;
         }
 
@@ -741,7 +766,7 @@ static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp)
                 wake_pinger = 1;
         }
 #else
-        /* liblustre has no pinger thead, so we wakup pinger anyway */
+        /* liblustre has no pinger thread, so we wakeup pinger anyway */
         wake_pinger = 1;
 #endif
 
@@ -754,6 +779,12 @@ static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp)
         EXIT;
 }
 
+static int ptlrpc_busy_reconnect(int rc)
+{
+        return (rc == -EBUSY) || (rc == -EAGAIN);
+}
+
+
 static int ptlrpc_connect_interpret(const struct lu_env *env,
                                     struct ptlrpc_request *request,
                                     void *data, int rc)
@@ -771,18 +802,22 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
                 spin_unlock(&imp->imp_lock);
                 RETURN(0);
         }
-        spin_unlock(&imp->imp_lock);
 
-        if (rc)
+        if (rc) {
+                /* if this reconnect to busy export - not need select new target
+                 * for connecting*/
+                imp->imp_force_reconnect = ptlrpc_busy_reconnect(rc);
+                spin_unlock(&imp->imp_lock);
                 GOTO(out, rc);
+        }
 
         LASSERT(imp->imp_conn_current);
 
         msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
 
         /* All imports are pingable */
-        spin_lock(&imp->imp_lock);
         imp->imp_pingable = 1;
+        imp->imp_force_reconnect = 0;
 
         if (aa->pcaa_initial_connect) {
                 if (msg_flags & MSG_CONNECT_REPLAYABLE) {
@@ -1045,7 +1080,7 @@ finish:
 
                 /* Reset ns_connect_flags only for initial connect. It might be
                  * changed in while using FS and if we reset it in reconnect
-                 * this leads to lossing user settings done before such as
+                 * this leads to losing user settings done before such as
                  * disable lru_resize, etc. */
                 if (old_connect_flags != exp->exp_connect_flags ||
                     aa->pcaa_initial_connect) {
@@ -1191,7 +1226,8 @@ static int signal_completed_replay(struct obd_import *imp)
         req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT;
         lustre_msg_add_flags(req->rq_reqmsg,
                              MSG_LOCK_REPLAY_DONE | MSG_REQ_REPLAY_DONE);
-        req->rq_timeout *= 3;
+        if (AT_OFF)
+                req->rq_timeout *= 3;
         req->rq_interpret_reply = completed_replay_interpret;
 
         ptlrpcd_add_req(req, PSCOPE_OTHER);
@@ -1205,7 +1241,7 @@ static int ptlrpc_invalidate_import_thread(void *data)
 
         ENTRY;
 
-        ptlrpc_daemonize("ll_imp_inval");
+        cfs_daemonize_ctxt("ll_imp_inval");
 
         CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n",
                imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
@@ -1252,7 +1288,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
 #ifdef __KERNEL__
                 /* bug 17802:  XXX client_disconnect_export vs connect request
                  * race. if client will evicted at this time, we start
-                 * invalidate thread without referece to import and import can
+                 * invalidate thread without reference to import and import can
                  * be freed at same time. */
                 class_import_get(imp);
                 rc = cfs_kernel_thread(ptlrpc_invalidate_import_thread, imp,