Whamcloud - gitweb
LU-9019 obd: use 64-bit time for obd_recovery_* fields 04/25604/7
authorJames Simmons <uja.ornl@yahoo.com>
Thu, 23 Mar 2017 13:56:52 +0000 (09:56 -0400)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 30 Mar 2017 03:54:19 +0000 (03:54 +0000)
The obd_recovery_* fields stores absolute times in 32-bit
time_t quantities, which will overflow in 2038. This
changes it to use time64_t.

Change-Id: I59e41bc876eeeb563921abb764cafc7e40781aad
Signed-off-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-on: https://review.whamcloud.com/25604
Tested-by: Jenkins
Reviewed-by: Dmitry Eremin <dmitry.eremin@intel.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd.h
lustre/ldlm/ldlm_lib.c
lustre/obdclass/lprocfs_status_server.c
lustre/obdclass/obd_mount_server.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/service.c

index af87bfb..77c61b3 100644 (file)
@@ -650,11 +650,11 @@ struct obd_device {
        /* protected by obd_recovery_task_lock */
        struct timer_list       obd_recovery_timer;
        /* seconds */
-       time_t                  obd_recovery_start;
+       time64_t                obd_recovery_start;
        /* seconds, for lprocfs_status */
-       time_t                  obd_recovery_end;
-       int                     obd_recovery_time_hard;
-       int                     obd_recovery_timeout;
+       time64_t                obd_recovery_end;
+       time64_t                obd_recovery_time_hard;
+       time64_t                obd_recovery_timeout;
        int                     obd_recovery_ir_factor;
 
        /* new recovery stuff from CMD2 */
index 65a8698..3677da1 100644 (file)
@@ -1568,11 +1568,13 @@ static void target_finish_recovery(struct lu_target *lut)
 
        /* Only log a recovery message when recovery has occurred. */
        if (obd->obd_recovery_start) {
-               time_t elapsed_time = max_t(time_t, 1, cfs_time_current_sec() -
-                                       obd->obd_recovery_start);
-               LCONSOLE_INFO("%s: Recovery over after %d:%.02d, of %d clients "
+               time64_t now = ktime_get_real_seconds();
+               time64_t elapsed_time;
+
+               elapsed_time = max_t(time64_t, now - obd->obd_recovery_start, 1);
+               LCONSOLE_INFO("%s: Recovery over after %lld:%.02lld, of %d clients "
                        "%d recovered and %d %s evicted.\n", obd->obd_name,
-                       (int)elapsed_time / 60, (int)elapsed_time % 60,
+                       (s64)elapsed_time / 60, (s64)elapsed_time % 60,
                        obd->obd_max_recoverable_clients,
                        atomic_read(&obd->obd_connected_clients),
                        obd->obd_stale_clients,
@@ -1596,7 +1598,7 @@ static void target_finish_recovery(struct lu_target *lut)
        }
        spin_unlock(&obd->obd_recovery_task_lock);
 
-        obd->obd_recovery_end = cfs_time_current_sec();
+       obd->obd_recovery_end = ktime_get_real_seconds();
 
        /* When recovery finished, cleanup orphans on MDS and OST. */
         if (OBT(obd) && OBP(obd, postrecov)) {
@@ -1725,17 +1727,16 @@ static void target_start_recovery_timer(struct obd_device *obd)
 
        mod_timer(&obd->obd_recovery_timer,
                  cfs_time_shift(obd->obd_recovery_timeout));
-       obd->obd_recovery_start = cfs_time_current_sec();
+       obd->obd_recovery_start = ktime_get_real_seconds();
        spin_unlock(&obd->obd_dev_lock);
 
-        LCONSOLE_WARN("%s: Will be in recovery for at least %d:%.02d, "
-                      "or until %d client%s reconnect%s\n",
-                      obd->obd_name,
-                      obd->obd_recovery_timeout / 60,
-                      obd->obd_recovery_timeout % 60,
-                      obd->obd_max_recoverable_clients,
-                      (obd->obd_max_recoverable_clients == 1) ? "" : "s",
-                      (obd->obd_max_recoverable_clients == 1) ? "s": "");
+       LCONSOLE_WARN("%s: Will be in recovery for at least %llu:%02llu, or until %d client%s reconnect%s\n",
+                     obd->obd_name,
+                     obd->obd_recovery_timeout / 60,
+                     obd->obd_recovery_timeout % 60,
+                     obd->obd_max_recoverable_clients,
+                     (obd->obd_max_recoverable_clients == 1) ? "" : "s",
+                     (obd->obd_max_recoverable_clients == 1) ? "s": "");
 }
 
 /**
@@ -1744,24 +1745,25 @@ static void target_start_recovery_timer(struct obd_device *obd)
  * if @extend is true, extend recovery window to have @drt remaining at least;
  * otherwise, make sure the recovery timeout value is not less than @drt.
  */
-static void extend_recovery_timer(struct obd_device *obd, int drt, bool extend)
+static void extend_recovery_timer(struct obd_device *obd, int drt,
+                                 bool extend)
 {
-       cfs_time_t now;
-       cfs_time_t end;
-       cfs_duration_t left;
-       int to;
+       time64_t now;
+       time64_t end;
+       time64_t left;
+       time64_t to;
 
        spin_lock(&obd->obd_dev_lock);
        if (!obd->obd_recovering || obd->obd_abort_recovery) {
                spin_unlock(&obd->obd_dev_lock);
-                return;
-        }
-        LASSERT(obd->obd_recovery_start != 0);
+               return;
+       }
+       LASSERT(obd->obd_recovery_start != 0);
 
-        now  = cfs_time_current_sec();
-        to   = obd->obd_recovery_timeout;
-        end  = obd->obd_recovery_start + to;
-        left = cfs_time_sub(end, now);
+       now = ktime_get_real_seconds();
+       to = obd->obd_recovery_timeout;
+       end = obd->obd_recovery_start + to;
+       left = end - now;
 
         if (extend && (drt > left)) {
                 to += drt - left;
@@ -1771,8 +1773,7 @@ static void extend_recovery_timer(struct obd_device *obd, int drt, bool extend)
 
        if (to > obd->obd_recovery_time_hard) {
                to = obd->obd_recovery_time_hard;
-               CWARN("%s: extended recovery timer reaching hard "
-                     "limit: %d, extend: %d\n",
+               CWARN("%s: extended recovery timer reaching hard limit: %lld, extend: %d\n",
                      obd->obd_name, to, extend);
        }
 
@@ -1784,8 +1785,8 @@ static void extend_recovery_timer(struct obd_device *obd, int drt, bool extend)
         }
        spin_unlock(&obd->obd_dev_lock);
 
-       CDEBUG(D_HA, "%s: recovery timer will expire in %u seconds\n",
-               obd->obd_name, (unsigned)cfs_time_sub(end, now));
+       CDEBUG(D_HA, "%s: recovery timer will expire in %lld seconds\n",
+               obd->obd_name, (s64)(end - now));
 }
 
 /* Reset the timer with each new client connection */
@@ -1989,8 +1990,8 @@ static int target_recovery_overseer(struct lu_target *lut,
        struct obd_device       *obd = lut->lut_obd;
        struct target_distribute_txn_data *tdtd;
 repeat:
-       if ((obd->obd_recovery_start != 0) && (cfs_time_current_sec() >=
-             (obd->obd_recovery_start + obd->obd_recovery_time_hard))) {
+       if (obd->obd_recovery_start != 0 && ktime_get_real_seconds() >=
+             (obd->obd_recovery_start + obd->obd_recovery_time_hard)) {
                __u64 next_update_transno = 0;
 
                /* Only abort the recovery if there are no update recovery
@@ -2213,9 +2214,8 @@ static int check_for_recovery_ready(struct lu_target *lut)
                         * timer expired, and some clients got evicted */
                        extend_recovery_timer(obd, obd->obd_recovery_timeout,
                                              true);
-                       CDEBUG(D_HA, "%s update recovery is not ready,"
-                              " extend recovery %d\n", obd->obd_name,
-                              obd->obd_recovery_timeout);
+                       CDEBUG(D_HA, "%s update recovery is not ready, extend recovery %llu\n",
+                              obd->obd_name, obd->obd_recovery_timeout);
                        return 0;
                }
        }
@@ -2618,9 +2618,9 @@ static void target_recovery_expired(unsigned long castmeharder)
 {
        struct obd_device *obd = (struct obd_device *)castmeharder;
        CDEBUG(D_HA, "%s: recovery timed out; %d clients are still in recovery"
-              " after %lds (%d clients connected)\n",
+              " after %llus (%d clients connected)\n",
               obd->obd_name, atomic_read(&obd->obd_lock_replay_clients),
-              cfs_time_current_sec()- obd->obd_recovery_start,
+              (s64)(ktime_get_real_seconds() - obd->obd_recovery_start),
               atomic_read(&obd->obd_connected_clients));
 
        obd->obd_recovery_expired = 1;
index defd672..fc74d16 100644 (file)
@@ -577,11 +577,12 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
        /* sampled unlocked, but really... */
        if (obd->obd_recovering == 0) {
                seq_printf(m, "COMPLETE\n");
-               seq_printf(m, "recovery_start: %lu\n", obd->obd_recovery_start);
-               seq_printf(m, "recovery_duration: %lu\n",
+               seq_printf(m, "recovery_start: %lld\n",
+                          (s64)obd->obd_recovery_start);
+               seq_printf(m, "recovery_duration: %lld\n",
                           obd->obd_recovery_end ?
                           obd->obd_recovery_end - obd->obd_recovery_start :
-                          cfs_time_current_sec() - obd->obd_recovery_start);
+                          ktime_get_real_seconds() - obd->obd_recovery_start);
                /* Number of clients that have completed recovery */
                seq_printf(m, "completed_clients: %d/%d\n",
                           obd->obd_max_recoverable_clients -
@@ -611,11 +612,11 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                        seq_printf(m, "WAITING\n");
                        seq_printf(m, "non-ready MDTs: %s\n",
                                   buf ? buf : "unknown (not enough RAM)");
-                       seq_printf(m, "recovery_start: %lu\n",
-                                  obd->obd_recovery_start);
-                       seq_printf(m, "time_waited: %lu\n",
-                                  cfs_time_current_sec() -
-                                  obd->obd_recovery_start);
+                       seq_printf(m, "recovery_start: %lld\n",
+                                  (s64)obd->obd_recovery_start);
+                       seq_printf(m, "time_waited: %lld\n",
+                                  (s64)(ktime_get_real_seconds() -
+                                        obd->obd_recovery_start));
                }
 
                if (buf != NULL)
@@ -626,14 +627,14 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
        }
 
        seq_printf(m, "RECOVERING\n");
-       seq_printf(m, "recovery_start: %lu\n", obd->obd_recovery_start);
-       seq_printf(m, "time_remaining: %lu\n",
-                  cfs_time_current_sec() >=
+       seq_printf(m, "recovery_start: %lld\n", (s64)obd->obd_recovery_start);
+       seq_printf(m, "time_remaining: %lld\n",
+                  ktime_get_real_seconds() >=
                   obd->obd_recovery_start +
                   obd->obd_recovery_timeout ? 0 :
-                  obd->obd_recovery_start +
-                  obd->obd_recovery_timeout -
-                  cfs_time_current_sec());
+                  (s64)(obd->obd_recovery_start +
+                        obd->obd_recovery_timeout -
+                        ktime_get_real_seconds()));
        seq_printf(m, "connected_clients: %d/%d\n",
                   atomic_read(&obd->obd_connected_clients),
                   obd->obd_max_recoverable_clients);
@@ -693,7 +694,7 @@ int lprocfs_recovery_time_soft_seq_show(struct seq_file *m, void *data)
        struct obd_device *obd = m->private;
 
        LASSERT(obd != NULL);
-       seq_printf(m, "%d\n", obd->obd_recovery_timeout);
+       seq_printf(m, "%llu\n", obd->obd_recovery_timeout);
        return 0;
 }
 EXPORT_SYMBOL(lprocfs_recovery_time_soft_seq_show);
@@ -725,7 +726,7 @@ int lprocfs_recovery_time_hard_seq_show(struct seq_file *m, void *data)
        struct obd_device *obd = m->private;
 
        LASSERT(obd != NULL);
-       seq_printf(m, "%u\n", obd->obd_recovery_time_hard);
+       seq_printf(m, "%lld\n", obd->obd_recovery_time_hard);
        return 0;
 }
 EXPORT_SYMBOL(lprocfs_recovery_time_hard_seq_show);
index 8cbdbbd..3c645c9 100644 (file)
@@ -1917,7 +1917,8 @@ void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd)
        }
 
        /* we're done */
-       obd->obd_recovery_timeout   = max(obd->obd_recovery_timeout, soft);
+       obd->obd_recovery_timeout = max_t(time64_t, obd->obd_recovery_timeout,
+                                         soft);
        obd->obd_recovery_time_hard = hard;
        obd->obd_recovery_ir_factor = factor;
 }
index b5fbd8e..e80d5b0 100644 (file)
@@ -511,27 +511,30 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
         }
         /* Report actual service time for client latency calc */
         lustre_msg_set_service_time(req->rq_repmsg, service_time);
-        /* Report service time estimate for future client reqs, but report 0
+       /* Report service time estimate for future client reqs, but report 0
         * (to be ignored by client) if it's an error reply during recovery.
-         * (bz15815) */
-        if (req->rq_type == PTL_RPC_MSG_ERR &&
+        * b=15815
+        */
+       if (req->rq_type == PTL_RPC_MSG_ERR &&
            (req->rq_export == NULL ||
             req->rq_export->exp_obd->obd_recovering)) {
-                lustre_msg_set_timeout(req->rq_repmsg, 0);
+               lustre_msg_set_timeout(req->rq_repmsg, 0);
        } else {
-               __u32 timeout;
+               time64_t timeout;
 
                if (req->rq_export && req->rq_reqmsg != NULL &&
                    (flags & PTLRPC_REPLY_EARLY) &&
                    lustre_msg_get_flags(req->rq_reqmsg) &
-                   (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))
-                       timeout = cfs_time_current_sec() -
-                               req->rq_arrival_time.tv_sec +
-                               min(at_extra,
-                                   req->rq_export->exp_obd->
-                                   obd_recovery_timeout / 4);
-               else
+                   (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
+                       struct obd_device *exp_obd = req->rq_export->exp_obd;
+
+                       timeout = ktime_get_real_seconds() -
+                                 req->rq_arrival_time.tv_sec +
+                                 min_t(time64_t, at_extra,
+                                       exp_obd->obd_recovery_timeout / 4);
+               } else {
                        timeout = at_get(&svcpt->scp_at_estimate);
+               }
                lustre_msg_set_timeout(req->rq_repmsg, timeout);
        }
 
index b0921ff..a23ffe4 100644 (file)
@@ -1300,17 +1300,22 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
        if (req->rq_export &&
            lustre_msg_get_flags(req->rq_reqmsg) &
            (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
+               struct obd_device *obd_exp = req->rq_export->exp_obd;
+
                /* During recovery, we don't want to send too many early
                 * replies, but on the other hand we want to make sure the
                 * client has enough time to resend if the rpc is lost. So
                 * during the recovery period send at least 4 early replies,
                 * spacing them every at_extra if we can. at_estimate should
-                * always equal this fixed value during recovery. */
+                * always equal this fixed value during recovery.
+                */
                /* Don't account request processing time into AT history
                 * during recovery, it is not service time we need but
-                * includes also waiting time for recovering clients */
-               newdl = cfs_time_current_sec() + min(at_extra,
-                       req->rq_export->exp_obd->obd_recovery_timeout / 4);
+                * includes also waiting time for recovering clients
+                */
+               newdl = min_t(time64_t, at_extra,
+                             obd_exp->obd_recovery_timeout / 4) +
+                       ktime_get_real_seconds();
        } else {
                /* We want to extend the request deadline by at_extra seconds,
                 * so we set our service estimate to reflect how much time has