Whamcloud - gitweb
Revert "LU-11771 ldlm: use hrtimer for recovery to fix timeout messages"
[fs/lustre-release.git] / lustre / ldlm / ldlm_lib.c
index 00e1542..5a587c7 100644 (file)
@@ -766,8 +766,9 @@ static int target_handle_reconnect(struct lustre_handle *conn,
 {
        struct obd_device *target;
        struct lustre_handle *hdl;
-       ktime_t remaining;
-       s64 timeout;
+       time64_t deadline;
+       time64_t timeout;
+       time64_t now;
        int rc = 0;
 
        ENTRY;
@@ -804,21 +805,16 @@ static int target_handle_reconnect(struct lustre_handle *conn,
                GOTO(out_already, rc);
        }
 
-       remaining = hrtimer_expires_remaining(&target->obd_recovery_timer);
-       timeout = ktime_divns(remaining, NSEC_PER_SEC);
-       if (timeout > 0) {
-               LCONSOLE_WARN("%s: Client %s (at %s) reconnected, waiting for %d clients in recovery for %lld:%.02lld\n",
-                             target->obd_name,
-                             obd_uuid2str(&exp->exp_client_uuid),
-                             obd_export_nid2str(exp),
-                             target->obd_max_recoverable_clients,
-                             timeout / 60, timeout % 60);
-       } else {
+       now = ktime_get_seconds();
+       deadline = jiffies_to_msecs(target->obd_recovery_timer.expires) /
+                  MSEC_PER_SEC;
+       if (now < deadline) {
                struct target_distribute_txn_data *tdtd;
                int size = 0;
                int count = 0;
                char *buf = NULL;
 
+               timeout = deadline - now;
                tdtd = class_exp2tgt(exp)->lut_tdtd;
                if (tdtd && tdtd->tdtd_show_update_logs_retrievers)
                        buf = tdtd->tdtd_show_update_logs_retrievers(
@@ -826,21 +822,30 @@ static int target_handle_reconnect(struct lustre_handle *conn,
                                &size, &count);
 
                if (count > 0)
-                       LCONSOLE_WARN("%s: Client %s (at %s) reconnecting, waiting for %d MDTs (%s) in recovery for %lld:%.02lld. Please wait until all MDTs recovered or you may force MDT evicition via 'lctl --device %s abort_recovery.\n",
-                                     target->obd_name,
-                                     obd_uuid2str(&exp->exp_client_uuid),
-                                     obd_export_nid2str(exp), count,
-                                     buf ? buf : "unknown (not enough RAM)",
-                                     (abs(timeout) + target->obd_recovery_timeout) / 60,
-                                     (abs(timeout) + target->obd_recovery_timeout) % 60,
-                                     target->obd_name);
+                       LCONSOLE_WARN("%s: Recovery already passed deadline "
+                                     "%lld:%.02lld. It is due to DNE recovery "
+                                     "failed/stuck on the %d MDT(s):%s. "
+                                     "Please wait until all MDTs recovered "
+                                     "or abort the recovery by force.\n",
+                                     target->obd_name, timeout / 60,
+                                     timeout % 60, count,
+                                     buf ? buf : "unknown (not enough RAM)");
                else
-                       LCONSOLE_WARN("%s: Recovery already passed deadline %lld:%.02lld. If you do not want to wait more, you may force taget eviction via 'lctl --device %s abort_recovery.\n",
-                                     target->obd_name, abs(timeout) / 60,
-                                     abs(timeout) % 60, target->obd_name);
+                       LCONSOLE_WARN("%s: Recovery already passed deadline "
+                                     "%lld:%.02lld. If you do not want to wait "
+                                     "more, please abort the recovery by "
+                                     "force.\n", target->obd_name,
+                                     timeout / 60, timeout % 60);
 
                if (buf != NULL)
                        OBD_FREE(buf, size);
+       } else {
+               timeout = now - deadline;
+               LCONSOLE_WARN("%s: Recovery already passed deadline"
+                       " %lld:%.02lld, It is most likely due to DNE"
+                       " recovery is failed or stuck, please wait a"
+                       " few more minutes or abort the recovery.\n",
+                       target->obd_name, timeout / 60, timeout % 60);
        }
 
 out_already:
@@ -1261,34 +1266,36 @@ no_export:
                /* allow "new" MDT to be connected during recovery, since we
                 * need retrieve recovery update records from it */
                if (target->obd_recovering && !lw_client && !mds_mds_conn) {
-                       struct hrtimer *timer = &target->obd_recovery_timer;
-                       ktime_t remaining;
-                       s64 timeout, left;
-                       int in_progress;
-                       int connected;
-                       int known;
-                       int stale;
+                       time64_t now;
+                       time64_t t;
                        char *msg;
-
-                       connected = atomic_read(&target->obd_connected_clients);
-                       in_progress = atomic_read(&target->obd_lock_replay_clients);
-                       known = target->obd_max_recoverable_clients;
-                       stale = target->obd_stale_clients;
-                       remaining = hrtimer_expires_remaining(timer);
-                       left = ktime_divns(remaining, NSEC_PER_SEC);
-                       if (ktime_to_ns(remaining) > 0) {
-                               msg = "to recover in";
-                               timeout = left;
-                       } else {
+                       int c; /* connected */
+                       int i; /* in progress */
+                       int k; /* known */
+                       int s; /* stale/evicted */
+
+                       c = atomic_read(&target->obd_connected_clients);
+                       i = atomic_read(&target->obd_lock_replay_clients);
+                       k = target->obd_max_recoverable_clients;
+                       s = target->obd_stale_clients;
+                       t = jiffies_to_msecs(target->obd_recovery_timer.expires);
+                       t /= MSEC_PER_SEC;
+                       now = ktime_get_seconds();
+                       if (now > t) {
+                               t = now - t;
                                msg = "already passed deadline";
-                               timeout = -left;
+                       } else {
+                               t -= now;
+                               msg = "to recover in";
                        }
 
-                       LCONSOLE_WARN("%s: Denying connection for new client %s (at %s), waiting for %d known clients (%d recovered, %d in progress, and %d evicted) %s %lld:%.02lld\n",
+                       LCONSOLE_WARN("%s: Denying connection for new client %s"
+                                     "(at %s), waiting for %d known clients "
+                                     "(%d recovered, %d in progress, and %d "
+                                     "evicted) %s %lld:%.02lld\n",
                                      target->obd_name, cluuid.uuid,
-                                     libcfs_nid2str(req->rq_peer.nid), known,
-                                     connected - in_progress, in_progress,
-                                     stale, msg, timeout / 60, timeout % 60);
+                                     libcfs_nid2str(req->rq_peer.nid), k,
+                                     c - i, i, s, msg, t / 60, t % 60);
                        rc = -EBUSY;
                } else {
 dont_check_exports:
@@ -1705,14 +1712,12 @@ EXPORT_SYMBOL(target_cleanup_recovery);
 /* obd_recovery_task_lock should be held */
 void target_cancel_recovery_timer(struct obd_device *obd)
 {
-       CDEBUG(D_HA, "%s: cancel recovery timer\n", obd->obd_name);
-       hrtimer_cancel(&obd->obd_recovery_timer);
+        CDEBUG(D_HA, "%s: cancel recovery timer\n", obd->obd_name);
+       del_timer(&obd->obd_recovery_timer);
 }
 
 static void target_start_recovery_timer(struct obd_device *obd)
 {
-       ktime_t delay;
-
        if (obd->obd_recovery_start != 0)
                return;
 
@@ -1729,8 +1734,8 @@ static void target_start_recovery_timer(struct obd_device *obd)
                return;
        }
 
-       delay = ktime_set(obd->obd_recovery_timeout, 0);
-       hrtimer_start(&obd->obd_recovery_timer, delay, HRTIMER_MODE_REL);
+       mod_timer(&obd->obd_recovery_timer,
+                 jiffies + cfs_time_seconds(obd->obd_recovery_timeout));
        obd->obd_recovery_start = ktime_get_real_seconds();
        spin_unlock(&obd->obd_dev_lock);
 
@@ -1749,12 +1754,13 @@ static void target_start_recovery_timer(struct obd_device *obd)
  * if @extend is true, extend recovery window to have @drt remaining at least;
  * otherwise, make sure the recovery timeout value is not less than @drt.
  */
-static void extend_recovery_timer(struct obd_device *obd, time_t drt,
+static void extend_recovery_timer(struct obd_device *obd, time64_t drt,
                                  bool extend)
 {
-       ktime_t left_ns;
-       time_t left;
-       time_t to;
+       time64_t now;
+       time64_t end;
+       time64_t left;
+       time64_t to;
 
        spin_lock(&obd->obd_dev_lock);
        if (!obd->obd_recovering || obd->obd_abort_recovery) {
@@ -1763,34 +1769,33 @@ static void extend_recovery_timer(struct obd_device *obd, time_t drt,
        }
        LASSERT(obd->obd_recovery_start != 0);
 
+       now = ktime_get_real_seconds();
        to = obd->obd_recovery_timeout;
-       left_ns = hrtimer_expires_remaining(&obd->obd_recovery_timer);
-       left = ktime_divns(left_ns, NSEC_PER_SEC);
-       if (extend && (drt > left))
-               to += drt - left;
-       else if (!extend && (drt > to))
-               to = drt;
+       end = obd->obd_recovery_start + to;
+       left = end - now;
+
+        if (extend && (drt > left)) {
+                to += drt - left;
+        } else if (!extend && (drt > to)) {
+                to = drt;
+        }
 
        if (to > obd->obd_recovery_time_hard) {
                to = obd->obd_recovery_time_hard;
-               CWARN("%s: extended recovery timer reaching hard limit: %ld, extend: %d\n",
+               CWARN("%s: extended recovery timer reaching hard limit: %lld, extend: %d\n",
                      obd->obd_name, to, extend);
        }
 
        if (obd->obd_recovery_timeout < to) {
-               ktime_t now = ktime_get_real();
-               ktime_t end;
-
-               obd->obd_recovery_timeout = to;
-               end = ktime_set(obd->obd_recovery_start + to, 0);
-               left_ns = ktime_sub(end, now);
-               hrtimer_forward_now(&obd->obd_recovery_timer, left_ns);
-               left = ktime_divns(left_ns, NSEC_PER_MSEC);
-       }
+                obd->obd_recovery_timeout = to;
+               end = obd->obd_recovery_start + to;
+               mod_timer(&obd->obd_recovery_timer,
+                         jiffies + cfs_time_seconds(end - now));
+        }
        spin_unlock(&obd->obd_dev_lock);
 
-       CDEBUG(D_HA, "%s: recovery timer will expire in %ld seconds\n",
-               obd->obd_name, left);
+       CDEBUG(D_HA, "%s: recovery timer will expire in %lld seconds\n",
+               obd->obd_name, (s64)(end - now));
 }
 
 /* Reset the timer with each new client connection */
@@ -1803,22 +1808,22 @@ static void extend_recovery_timer(struct obd_device *obd, time_t drt,
  * be extended to make sure the client could be reconnected, in the
  * process, the timeout from the new client should be ignored.
  */
+
 static void
 check_and_start_recovery_timer(struct obd_device *obd,
-                              struct ptlrpc_request *req,
-                              int new_client)
+                               struct ptlrpc_request *req,
+                               int new_client)
 {
-       time_t service_time = lustre_msg_get_service_time(req->rq_reqmsg);
-       struct obd_device_target *obt = &obd->u.obt;
+       time64_t service_time = lustre_msg_get_service_time(req->rq_reqmsg);
+        struct obd_device_target *obt = &obd->u.obt;
 
-       if (!new_client && service_time)
-               /* Teach server about old server's estimates, as first guess
-                * at how long new requests will take.
-                */
+        if (!new_client && service_time)
+                /* Teach server about old server's estimates, as first guess
+                 * at how long new requests will take. */
                at_measured(&req->rq_rqbd->rqbd_svcpt->scp_at_estimate,
-                           service_time);
+                            service_time);
 
-       target_start_recovery_timer(obd);
+        target_start_recovery_timer(obd);
 
        /* Convert the service time to RPC timeout,
         * and reuse service_time to limit stack usage.
@@ -2174,15 +2179,15 @@ static void handle_recovery_req(struct ptlrpc_thread *thread,
         (void)handler(req);
         lu_context_exit(&thread->t_env->le_ctx);
 
-       /* don't reset timer for final stage */
-       if (!exp_finished(req->rq_export)) {
-               time_t to = obd_timeout;
+        /* don't reset timer for final stage */
+        if (!exp_finished(req->rq_export)) {
+               time64_t to = obd_timeout;
 
-               /**
-                * Add request timeout to the recovery time so next request from
-                * this client may come in recovery time
-                */
-               if (!AT_OFF) {
+                /**
+                 * Add request timeout to the recovery time so next request from
+                 * this client may come in recovery time
+                 */
+                if (!AT_OFF) {
                        struct ptlrpc_service_part *svcpt;
 
                        svcpt = req->rq_rqbd->rqbd_svcpt;
@@ -2190,10 +2195,10 @@ static void handle_recovery_req(struct ptlrpc_thread *thread,
                         * the client will recalculate the timeout according to
                         * current server estimate service time, so we will
                         * use the maxium timeout here for waiting the client
-                        * sending the next req
-                        */
-                       to = max_t(time_t, at_est2timeout(at_get(&svcpt->scp_at_estimate)),
-                                  lustre_msg_get_timeout(req->rq_reqmsg));
+                        * sending the next req */
+                       to = max((int)at_est2timeout(
+                                at_get(&svcpt->scp_at_estimate)),
+                                (int)lustre_msg_get_timeout(req->rq_reqmsg));
                        /* Add 2 net_latency, one for balance rq_deadline
                         * (see ptl_send_rpc), one for resend the req to server,
                         * Note: client will pack net_latency in replay req
@@ -2629,20 +2634,17 @@ void target_recovery_fini(struct obd_device *obd)
 }
 EXPORT_SYMBOL(target_recovery_fini);
 
-static enum hrtimer_restart target_recovery_expired(struct hrtimer *timer)
+static void target_recovery_expired(cfs_timer_cb_arg_t data)
 {
-       struct obd_device *obd = container_of(timer, struct obd_device,
-                                             obd_recovery_timer);
-
-       CDEBUG(D_HA,
-              "%s: recovery timed out; %d clients are still in recovery after %llu seconds (%d clients connected)\n",
+       struct obd_device *obd = cfs_from_timer(obd, data, obd_recovery_timer);
+       CDEBUG(D_HA, "%s: recovery timed out; %d clients are still in recovery"
+              " after %llus (%d clients connected)\n",
               obd->obd_name, atomic_read(&obd->obd_lock_replay_clients),
-              ktime_get_real_seconds() - obd->obd_recovery_start,
+              (s64)(ktime_get_real_seconds() - obd->obd_recovery_start),
               atomic_read(&obd->obd_connected_clients));
 
        obd->obd_recovery_expired = 1;
        wake_up(&obd->obd_next_transno_waitq);
-       return HRTIMER_NORESTART;
 }
 
 void target_recovery_init(struct lu_target *lut, svc_handler_t handler)
@@ -2661,14 +2663,13 @@ void target_recovery_init(struct lu_target *lut, svc_handler_t handler)
        CDEBUG(D_HA, "RECOVERY: service %s, %d recoverable clients, "
               "last_transno %llu\n", obd->obd_name,
               obd->obd_max_recoverable_clients, obd->obd_last_committed);
-       LASSERT(obd->obd_stopping == 0);
-       obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
-       obd->obd_recovery_start = 0;
-       obd->obd_recovery_end = 0;
-
-       hrtimer_init(&obd->obd_recovery_timer, CLOCK_REALTIME,
-                    HRTIMER_MODE_REL);
-       obd->obd_recovery_timer.function = &target_recovery_expired;
+        LASSERT(obd->obd_stopping == 0);
+        obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
+        obd->obd_recovery_start = 0;
+        obd->obd_recovery_end = 0;
+
+       cfs_timer_setup(&obd->obd_recovery_timer, target_recovery_expired,
+                       (unsigned long)obd, 0);
        target_start_recovery_thread(lut, handler);
 }
 EXPORT_SYMBOL(target_recovery_init);