Whamcloud - gitweb
LU-13600 ptlrpc: limit rate of lock replays
[fs/lustre-release.git] / lustre / ptlrpc / import.c
index edb3f66..4849364 100644 (file)
@@ -466,7 +466,7 @@ void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
 
 int ptlrpc_reconnect_import(struct obd_import *imp)
 {
-#ifdef ENABLE_PINGER
+#ifdef CONFIG_LUSTRE_FS_PINGER
        long timeout_jiffies = cfs_time_seconds(obd_timeout);
        int rc;
 
@@ -579,14 +579,16 @@ static int import_select_connection(struct obd_import *imp)
         */
        if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) {
                struct adaptive_timeout *at = &imp->imp_at.iat_net_latency;
+
                if (at_get(at) < CONNECTION_SWITCH_MAX) {
                        at_measured(at, at_get(at) + CONNECTION_SWITCH_INC);
                        if (at_get(at) > CONNECTION_SWITCH_MAX)
                                at_reset(at, CONNECTION_SWITCH_MAX);
                }
                LASSERT(imp_conn->oic_last_attempt);
-               CDEBUG(D_HA, "%s: tried all connections, increasing latency "
-                      "to %ds\n", imp->imp_obd->obd_name, at_get(at));
+               CDEBUG(D_HA,
+                      "%s: tried all connections, increasing latency to %ds\n",
+                      imp->imp_obd->obd_name, at_get(at));
        }
 
        imp_conn->oic_last_attempt = ktime_get_seconds();
@@ -777,8 +779,8 @@ int ptlrpc_connect_import_locked(struct obd_import *imp)
 
        /* Report the rpc service time to the server so that it knows how long
         * to wait for clients to join recovery */
-       lustre_msg_set_service_time(request->rq_reqmsg,
-                                   at_timeout2est(request->rq_timeout));
+       lustre_msg_set_service_timeout(request->rq_reqmsg,
+                                      at_timeout2est(request->rq_timeout));
 
        /* The amount of time we give the server to process the connect req.
         * import_select_connection will increase the net latency on
@@ -814,7 +816,7 @@ int ptlrpc_connect_import_locked(struct obd_import *imp)
                lustre_msg_add_op_flags(request->rq_reqmsg,
                                        MSG_CONNECT_TRANSNO);
 
-       DEBUG_REQ(D_RPCTRACE, request, "(re)connect request (timeout %ld)",
+       DEBUG_REQ(D_RPCTRACE, request, "(re)connect request (timeout %d)",
                  request->rq_timeout);
        ptlrpcd_add_req(request);
        rc = 0;
@@ -1011,6 +1013,7 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
        struct obd_import *imp = request->rq_import;
        struct lustre_handle old_hdl;
        __u64 old_connect_flags;
+       timeout_t service_timeout;
        int msg_flags;
        struct obd_connect_data *ocd;
        struct obd_export *exp = NULL;
@@ -1145,11 +1148,11 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
        imp->imp_obd->obd_self_export->exp_connect_data = *ocd;
 
        /* The net statistics after (re-)connect is not valid anymore,
-        * because may reflect other routing, etc. */
+        * because may reflect other routing, etc.
+        */
+       service_timeout = lustre_msg_get_service_timeout(request->rq_repmsg);
        at_reinit(&imp->imp_at.iat_net_latency, 0, 0);
-       ptlrpc_at_adj_net_latency(request,
-                                 lustre_msg_get_service_time(
-                                         request->rq_repmsg));
+       ptlrpc_at_adj_net_latency(request, service_timeout);
 
        /* Import flags should be updated before waking import at FULL state */
        rc = ptlrpc_connect_set_flags(imp, ocd, old_connect_flags, exp,
@@ -1534,6 +1537,8 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
 
         ENTRY;
         if (imp->imp_state == LUSTRE_IMP_EVICTED) {
+               struct task_struct *task;
+
                 deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
                           &target_start, &target_len);
                 /* Don't care about MGC eviction */
@@ -1544,7 +1549,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
                                           "using this service will fail.\n",
                                           imp->imp_obd->obd_name, target_len,
                                           target_start);
-                       LASSERTF(!obd_lbug_on_eviction, "LBUG upon eviction");
+                       LASSERTF(!obd_lbug_on_eviction, "LBUG upon eviction\n");
                 }
                 CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
                        obd2cli_tgt(imp->imp_obd),
@@ -1554,24 +1559,22 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
                imp->imp_vbr_failed = 0;
                spin_unlock(&imp->imp_lock);
 
-               {
-               struct task_struct *task;
                /* bug 17802:  XXX client_disconnect_export vs connect request
                 * race. if client is evicted at this time then we start
                 * invalidate thread without reference to import and import can
                 * be freed at same time. */
                class_import_get(imp);
                task = kthread_run(ptlrpc_invalidate_import_thread, imp,
-                                    "ll_imp_inval");
+                                  "ll_imp_inval");
                if (IS_ERR(task)) {
                        class_import_put(imp);
-                       CERROR("error starting invalidate thread: %d\n", rc);
                        rc = PTR_ERR(task);
+                       CERROR("%s: can't start invalidate thread: rc = %d\n",
+                              imp->imp_obd->obd_name, rc);
                } else {
                        rc = 0;
                }
                RETURN(rc);
-               }
         }
 
        if (imp->imp_state == LUSTRE_IMP_REPLAY) {
@@ -1668,7 +1671,7 @@ static struct ptlrpc_request *ptlrpc_disconnect_prep_req(struct obd_import *imp)
 
        /* We want client umounts to happen quickly, no matter the
           server state... */
-       req->rq_timeout = min_t(int, req->rq_timeout,
+       req->rq_timeout = min_t(timeout_t, req->rq_timeout,
                                INITIAL_CONNECT_TIMEOUT);
 
        import_set_state(imp, LUSTRE_IMP_CONNECTING);
@@ -1785,7 +1788,7 @@ static int ptlrpc_disconnect_idle_interpret(const struct lu_env *env,
                memset(&imp->imp_remote_handle, 0,
                       sizeof(imp->imp_remote_handle));
                /* take our DISCONNECT into account */
-               if (atomic_read(&imp->imp_inflight) > 1) {
+               if (atomic_read(&imp->imp_reqs) > 1) {
                        imp->imp_generation++;
                        imp->imp_initiated_at = imp->imp_generation;
                        import_set_state_nolock(imp, LUSTRE_IMP_NEW);
@@ -1863,45 +1866,48 @@ void ptlrpc_cleanup_imp(struct obd_import *imp)
 
 /* Adaptive Timeout utils */
 
-/* Update at_current with the specified value (bounded by at_min and at_max),
- * as well as the AT history "bins".
+/* Update at_current_timeout with the specified value (bounded by at_min and
+ * at_max), as well as the AT history "bins".
  *  - Bin into timeslices using AT_BINS bins.
  *  - This gives us a max of the last at_history seconds without the storage,
  *    but still smoothing out a return to normalcy from a slow response.
  *  - (E.g. remember the maximum latency in each minute of the last 4 minutes.)
  */
-int at_measured(struct adaptive_timeout *at, unsigned int val)
+timeout_t at_measured(struct adaptive_timeout *at, timeout_t timeout)
 {
-        unsigned int old = at->at_current;
+       timeout_t old_timeout = at->at_current_timeout;
        time64_t now = ktime_get_real_seconds();
        long binlimit = max_t(long, at_history / AT_BINS, 1);
 
         LASSERT(at);
-        CDEBUG(D_OTHER, "add %u to %p time=%lu v=%u (%u %u %u %u)\n",
-              val, at, (long)(now - at->at_binstart), at->at_current,
+       CDEBUG(D_OTHER, "add %u to %p time=%lld v=%u (%u %u %u %u)\n",
+              timeout, at, now - at->at_binstart, at->at_current_timeout,
                at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]);
 
-        if (val == 0)
-                /* 0's don't count, because we never want our timeout to
-                   drop to 0, and because 0 could mean an error */
+       if (timeout <= 0)
+               /* Negative timeouts and 0's don't count, because we never
+                * want our timeout to drop to 0 or below, and because 0 could
+                * mean an error
+                */
                 return 0;
 
        spin_lock(&at->at_lock);
 
         if (unlikely(at->at_binstart == 0)) {
                 /* Special case to remove default from history */
-                at->at_current = val;
-                at->at_worst_ever = val;
-                at->at_worst_time = now;
-                at->at_hist[0] = val;
+               at->at_current_timeout = timeout;
+               at->at_worst_timeout_ever = timeout;
+               at->at_worst_timestamp = now;
+               at->at_hist[0] = timeout;
                 at->at_binstart = now;
         } else if (now - at->at_binstart < binlimit ) {
                 /* in bin 0 */
-                at->at_hist[0] = max(val, at->at_hist[0]);
-                at->at_current = max(val, at->at_current);
+               at->at_hist[0] = max_t(timeout_t, timeout, at->at_hist[0]);
+               at->at_current_timeout = max_t(timeout_t, timeout,
+                                              at->at_current_timeout);
         } else {
                 int i, shift;
-                unsigned int maxv = val;
+               timeout_t maxv = timeout;
 
                /* move bins over */
                shift = (u32)(now - at->at_binstart) / binlimit;
@@ -1909,42 +1915,45 @@ int at_measured(struct adaptive_timeout *at, unsigned int val)
                 for(i = AT_BINS - 1; i >= 0; i--) {
                         if (i >= shift) {
                                 at->at_hist[i] = at->at_hist[i - shift];
-                                maxv = max(maxv, at->at_hist[i]);
+                               maxv = max_t(timeout_t, maxv, at->at_hist[i]);
                         } else {
                                 at->at_hist[i] = 0;
                         }
                 }
-                at->at_hist[0] = val;
-                at->at_current = maxv;
+               at->at_hist[0] = timeout;
+               at->at_current_timeout = maxv;
                 at->at_binstart += shift * binlimit;
         }
 
-        if (at->at_current > at->at_worst_ever) {
-                at->at_worst_ever = at->at_current;
-                at->at_worst_time = now;
-        }
+       if (at->at_current_timeout > at->at_worst_timeout_ever) {
+               at->at_worst_timeout_ever = at->at_current_timeout;
+               at->at_worst_timestamp = now;
+       }
 
-        if (at->at_flags & AT_FLG_NOHIST)
+       if (at->at_flags & AT_FLG_NOHIST)
                 /* Only keep last reported val; keeping the rest of the history
-                   for proc only */
-                at->at_current = val;
+                * for debugfs only
+                */
+               at->at_current_timeout = timeout;
 
         if (at_max > 0)
-                at->at_current =  min(at->at_current, at_max);
-        at->at_current =  max(at->at_current, at_min);
-
-        if (at->at_current != old)
-                CDEBUG(D_OTHER, "AT %p change: old=%u new=%u delta=%d "
-                       "(val=%u) hist %u %u %u %u\n", at,
-                       old, at->at_current, at->at_current - old, val,
+               at->at_current_timeout = min_t(timeout_t,
+                                              at->at_current_timeout, at_max);
+       at->at_current_timeout = max_t(timeout_t, at->at_current_timeout,
+                                      at_min);
+       if (at->at_current_timeout != old_timeout)
+               CDEBUG(D_OTHER,
+                      "AT %p change: old=%u new=%u delta=%d (val=%d) hist %u %u %u %u\n",
+                      at, old_timeout, at->at_current_timeout,
+                      at->at_current_timeout - old_timeout, timeout,
                        at->at_hist[0], at->at_hist[1], at->at_hist[2],
                        at->at_hist[3]);
 
-        /* if we changed, report the old value */
-        old = (at->at_current != old) ? old : 0;
+       /* if we changed, report the old timeout value */
+       old_timeout = (at->at_current_timeout != old_timeout) ? old_timeout : 0;
 
        spin_unlock(&at->at_lock);
-        return old;
+       return old_timeout;
 }
 
 /* Find the imp_at index for a given portal; assign if space available */