Whamcloud - gitweb
LU-13344 ptlrpc: change request timeouts to s32 33/37933/6
authorJames Simmons <jsimmons@infradead.org>
Fri, 20 Mar 2020 21:40:25 +0000 (17:40 -0400)
committerOleg Drokin <green@whamcloud.com>
Tue, 31 Mar 2020 07:01:11 +0000 (07:01 +0000)
In the latest kernels time_t has been removed since time_t has
been a 64 bit value just like time64_t so no need for it anymore.
This means all kernel time keeping are all 64 bits but Lustre
sends 32 bit values, in seconds, for its timeout between nodes.
To keep backwards compatibility as well as distinguish timeouts
from timestamps change the Lustre fields representing timeouts
to a new typedef timeout_t which is a s32. The reason for picking
a signed number is that it makes it easier to detect if an
incorrect timeout was calculated. In the past negative timeouts
have happened which would look like an enormous timeout if its
treated as an unsigned number.

Change-Id: I7491bbac65023c544e8f4a0488b1d07315a739e5
Signed-off-by: James Simmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/37933
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Neil Brown <neilb@suse.de>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
12 files changed:
contrib/scripts/spelling.txt
libcfs/include/libcfs/libcfs.h
lustre/include/lustre_import.h
lustre/include/lustre_net.h
lustre/include/lustre_osc.h
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lockd.c
lustre/ptlrpc/client.c
lustre/ptlrpc/import.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/ptlrpc_internal.h

index 9a96834..008a87a 100644 (file)
@@ -168,5 +168,6 @@ strcat||strncat
 strcpy||strncpy
 struct timeval||struct timespec64
 tempnam||mkstemp
+time_t||timeout_t
 timer_setup||cfs_timer_setup
 wait_queue_t||wait_queue_entry_t
index b60343b..80dd9d2 100644 (file)
@@ -92,6 +92,8 @@ do {                                                                  \
 } while (0)
 #endif
 
+typedef s32 timeout_t;
+
 /* need both kernel and user-land acceptor */
 #define LNET_ACCEPTOR_MIN_RESERVED_PORT    512
 #define LNET_ACCEPTOR_MAX_RESERVED_PORT    1023
index 0dae5e7..aa2e9a3 100644 (file)
@@ -341,11 +341,11 @@ static inline unsigned int at_est2timeout(unsigned int val)
         return (val + (val >> 2) + 5);
 }
 
-static inline unsigned int at_timeout2est(unsigned int val)
+static inline timeout_t at_timeout2est(timeout_t timeout)
 {
-        /* restore estimate value from timeout: e=4/5(t-5) */
-        LASSERT(val);
-        return (max((val << 2) / 5, 5U) - 4);
+       /* restore estimate value from timeout: e=4/5(t-5) */
+       LASSERT(timeout > 0);
+       return max((timeout << 2) / 5, 5) - 4;
 }
 
 static inline void at_reset_nolock(struct adaptive_timeout *at, int val)
index 4f49298..a7c196a 100644 (file)
@@ -1119,8 +1119,16 @@ struct ptlrpc_request {
         * service time estimate (secs)
         * If the request is not served by this time, it is marked as timed out.
         * Do not change to time64_t since this is transmitted over the wire.
+        *
+        * The linux kernel handles timestamps with time64_t and timeouts
+        * are normally done with jiffies. Lustre shares the rq_timeout between
+        * nodes. Since jiffies can vary from node to node Lustre instead
+        * will express the timeout value in seconds. To avoid confusion with
+        * timestamps (time64_t) and jiffy timeouts (long) Lustre timeouts
+        * are expressed in s32 (timeout_t). Also what is transmitted over
+        * the wire is 32 bits.
         */
-       time_t                           rq_timeout;
+       timeout_t                        rq_timeout;
        /**
         * when request/reply sent (secs), or time when request should be sent
         */
@@ -2323,8 +2331,8 @@ void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit);
 int lustre_msg_get_status(struct lustre_msg *msg);
 __u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg);
 __u32 lustre_msg_get_magic(struct lustre_msg *msg);
-__u32 lustre_msg_get_timeout(struct lustre_msg *msg);
-__u32 lustre_msg_get_service_time(struct lustre_msg *msg);
+timeout_t lustre_msg_get_timeout(struct lustre_msg *msg);
+timeout_t lustre_msg_get_service_timeout(struct lustre_msg *msg);
 char *lustre_msg_get_jobid(struct lustre_msg *msg);
 __u32 lustre_msg_get_cksum(struct lustre_msg *msg);
 __u64 lustre_msg_get_mbits(struct lustre_msg *msg);
@@ -2341,8 +2349,9 @@ void lustre_msg_set_status(struct lustre_msg *msg, __u32 status);
 void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt);
 void ptlrpc_req_set_repsize(struct ptlrpc_request *req, int count, __u32 *sizes);
 void ptlrpc_request_set_replen(struct ptlrpc_request *req);
-void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout);
-void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time);
+void lustre_msg_set_timeout(struct lustre_msg *msg, timeout_t timeout);
+void lustre_msg_set_service_timeout(struct lustre_msg *msg,
+                                   timeout_t service_timeout);
 void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid);
 void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum);
 void lustre_msg_set_mbits(struct lustre_msg *msg, __u64 mbits);
index 9a0fd6a..6601a19 100644 (file)
@@ -533,7 +533,7 @@ struct osc_brw_async_args {
        int                      aa_requested_nob;
        int                      aa_nio_count;
        u32                      aa_page_count;
-       int                      aa_resends;
+       s32                      aa_resends;
        struct brw_page         **aa_ppga;
        struct client_obd       *aa_cli;
        struct list_head         aa_oaps;
index 6d14580..038da9e 100644 (file)
@@ -1892,40 +1892,40 @@ check_and_start_recovery_timer(struct obd_device *obd,
                               struct ptlrpc_request *req,
                               int new_client)
 {
-       time_t service_time = lustre_msg_get_service_time(req->rq_reqmsg);
+       timeout_t service_timeout = lustre_msg_get_service_timeout(req->rq_reqmsg);
        struct obd_device_target *obt = &obd->u.obt;
 
-       if (!new_client && service_time)
+       if (!new_client && service_timeout)
                /*
                 * Teach server about old server's estimates, as first guess
                 * at how long new requests will take.
                 */
                at_measured(&req->rq_rqbd->rqbd_svcpt->scp_at_estimate,
-                           service_time);
+                           service_timeout);
 
        target_start_recovery_timer(obd);
 
        /*
         * Convert the service time to RPC timeout,
-        * and reuse service_time to limit stack usage.
+        * and reuse service_timeout to limit stack usage.
         */
-       service_time = at_est2timeout(service_time);
+       service_timeout = at_est2timeout(service_timeout);
 
        if (OBD_FAIL_CHECK(OBD_FAIL_TGT_SLUGGISH_NET) &&
-           service_time < at_extra)
-               service_time = at_extra;
+           service_timeout < at_extra)
+               service_timeout = at_extra;
 
        /*
-        * We expect other clients to timeout within service_time, then try
+        * We expect other clients to timeout within service_timeout, then try
         * to reconnect, then try the failover server.  The max delay between
         * connect attempts is SWITCH_MAX + SWITCH_INC + INITIAL.
         */
-       service_time += 2 * INITIAL_CONNECT_TIMEOUT;
+       service_timeout += 2 * INITIAL_CONNECT_TIMEOUT;
 
        LASSERT(obt->obt_magic == OBT_MAGIC);
-       service_time += 2 * (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC);
-       if (service_time > obd->obd_recovery_timeout && !new_client)
-               extend_recovery_timer(obd, service_time, false);
+       service_timeout += 2 * (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC);
+       if (service_timeout > obd->obd_recovery_timeout && !new_client)
+               extend_recovery_timer(obd, service_timeout, false);
 }
 
 /** Health checking routines */
@@ -2277,14 +2277,15 @@ static void handle_recovery_req(struct ptlrpc_thread *thread,
 
        /* don't reset timer for final stage */
        if (!exp_finished(req->rq_export)) {
-               time_t to = obd_timeout;
+               timeout_t timeout = obd_timeout;
 
                /**
-                * Add request timeout to the recovery time so next request from
+                * Add request @timeout to the recovery time so next request from
                 * this client may come in recovery time
                 */
                if (!AT_OFF) {
                        struct ptlrpc_service_part *svcpt;
+                       timeout_t est_timeout;
 
                        svcpt = req->rq_rqbd->rqbd_svcpt;
                        /*
@@ -2294,18 +2295,19 @@ static void handle_recovery_req(struct ptlrpc_thread *thread,
                         * use the maxium timeout here for waiting the client
                         * sending the next req
                         */
-                       to = max_t(time_t,
-                                  at_est2timeout(at_get(&svcpt->scp_at_estimate)),
-                                  lustre_msg_get_timeout(req->rq_reqmsg));
+                       est_timeout = at_get(&svcpt->scp_at_estimate);
+                       timeout = max_t(timeout_t, at_est2timeout(est_timeout),
+                                       lustre_msg_get_timeout(req->rq_reqmsg));
                        /*
                         * Add 2 net_latency, one for balance rq_deadline
                         * (see ptl_send_rpc), one for resend the req to server,
                         * Note: client will pack net_latency in replay req
                         * (see ptlrpc_replay_req)
                         */
-                       to += 2 * lustre_msg_get_service_time(req->rq_reqmsg);
+                       timeout += 2 * lustre_msg_get_service_timeout(req->rq_reqmsg);
                }
-               extend_recovery_timer(class_exp2obd(req->rq_export), to, true);
+               extend_recovery_timer(class_exp2obd(req->rq_export), timeout,
+                                     true);
        }
        EXIT;
 }
index de04b6d..5f331cc 100644 (file)
@@ -72,12 +72,12 @@ static struct ldlm_state *ldlm_state;
 /*
  * timeout for initial callback (AST) reply (bz10399)
  * Due to having to send a 32 bit time value over the
- * wire return it as time_t instead of time64_t
+ * wire return it as timeout_t instead of time64_t
  */
-static inline time_t ldlm_get_rq_timeout(void)
+static inline timeout_t ldlm_get_rq_timeout(void)
 {
        /* Non-AT value */
-       time_t timeout = min(ldlm_timeout, obd_timeout / 3);
+       timeout_t timeout = min(ldlm_timeout, obd_timeout / 3);
 
        return timeout < 1 ? 1 : timeout;
 }
index d85bee5..f5b9eed 100644 (file)
@@ -369,7 +369,7 @@ int ptlrpc_at_get_net_latency(struct ptlrpc_request *req)
 
 /* Adjust expected network latency */
 void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
-                              unsigned int service_time)
+                              timeout_t service_timeout)
 {
        unsigned int nl, oldnl;
        struct imp_at *at;
@@ -377,7 +377,7 @@ void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
 
        LASSERT(req->rq_import);
 
-       if (service_time > now - req->rq_sent + 3) {
+       if (service_timeout > now - req->rq_sent + 3) {
                /*
                 * b=16408, however, this can also happen if early reply
                 * is lost and client RPC is expired and resent, early reply
@@ -389,13 +389,13 @@ void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
                CDEBUG((lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) ?
                       D_ADAPTTO : D_WARNING,
                       "Reported service time %u > total measured time %lld\n",
-                      service_time, now - req->rq_sent);
+                      service_timeout, now - req->rq_sent);
                return;
        }
 
        /* Network latency is total time less server processing time */
        nl = max_t(int, now - req->rq_sent -
-                       service_time, 0) + 1; /* st rounding */
+                       service_timeout, 0) + 1; /* st rounding */
        at = &req->rq_import->imp_at;
 
        oldnl = at_measured(&at->iat_net_latency, nl);
@@ -437,6 +437,7 @@ static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req)
 __must_hold(&req->rq_lock)
 {
        struct ptlrpc_request *early_req;
+       timeout_t service_timeout;
        time64_t olddl;
        int rc;
 
@@ -468,8 +469,8 @@ __must_hold(&req->rq_lock)
        lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout);
 
        /* Network latency can be adjusted, it is pure network delays */
-       ptlrpc_at_adj_net_latency(req,
-                                 lustre_msg_get_service_time(early_req->rq_repmsg));
+       service_timeout = lustre_msg_get_service_timeout(early_req->rq_repmsg);
+       ptlrpc_at_adj_net_latency(req, service_timeout);
 
        sptlrpc_cli_finish_early_reply(early_req);
 
@@ -1503,7 +1504,7 @@ static int after_reply(struct ptlrpc_request *req)
                CFS_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_PAUSE_REP, cfs_fail_val);
        ptlrpc_at_adj_service(req, lustre_msg_get_timeout(req->rq_repmsg));
        ptlrpc_at_adj_net_latency(req,
-                                 lustre_msg_get_service_time(req->rq_repmsg));
+                                 lustre_msg_get_service_timeout(req->rq_repmsg));
 
        rc = ptlrpc_check_status(req);
 
@@ -3219,8 +3220,8 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
        ptlrpc_at_set_req_timeout(req);
 
        /* Tell server net_latency to calculate how long to wait for reply. */
-       lustre_msg_set_service_time(req->rq_reqmsg,
-                                   ptlrpc_at_get_net_latency(req));
+       lustre_msg_set_service_timeout(req->rq_reqmsg,
+                                      ptlrpc_at_get_net_latency(req));
        DEBUG_REQ(D_HA, req, "REPLAY");
 
        atomic_inc(&req->rq_import->imp_replay_inflight);
index edb3f66..4fd5193 100644 (file)
@@ -777,8 +777,8 @@ int ptlrpc_connect_import_locked(struct obd_import *imp)
 
        /* Report the rpc service time to the server so that it knows how long
         * to wait for clients to join recovery */
-       lustre_msg_set_service_time(request->rq_reqmsg,
-                                   at_timeout2est(request->rq_timeout));
+       lustre_msg_set_service_timeout(request->rq_reqmsg,
+                                      at_timeout2est(request->rq_timeout));
 
        /* The amount of time we give the server to process the connect req.
         * import_select_connection will increase the net latency on
@@ -814,7 +814,7 @@ int ptlrpc_connect_import_locked(struct obd_import *imp)
                lustre_msg_add_op_flags(request->rq_reqmsg,
                                        MSG_CONNECT_TRANSNO);
 
-       DEBUG_REQ(D_RPCTRACE, request, "(re)connect request (timeout %ld)",
+       DEBUG_REQ(D_RPCTRACE, request, "(re)connect request (timeout %d)",
                  request->rq_timeout);
        ptlrpcd_add_req(request);
        rc = 0;
@@ -1011,6 +1011,7 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
        struct obd_import *imp = request->rq_import;
        struct lustre_handle old_hdl;
        __u64 old_connect_flags;
+       timeout_t service_timeout;
        int msg_flags;
        struct obd_connect_data *ocd;
        struct obd_export *exp = NULL;
@@ -1145,11 +1146,11 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
        imp->imp_obd->obd_self_export->exp_connect_data = *ocd;
 
        /* The net statistics after (re-)connect is not valid anymore,
-        * because may reflect other routing, etc. */
+        * because may reflect other routing, etc.
+        */
+       service_timeout = lustre_msg_get_service_timeout(request->rq_repmsg);
        at_reinit(&imp->imp_at.iat_net_latency, 0, 0);
-       ptlrpc_at_adj_net_latency(request,
-                                 lustre_msg_get_service_time(
-                                         request->rq_repmsg));
+       ptlrpc_at_adj_net_latency(request, service_timeout);
 
        /* Import flags should be updated before waking import at FULL state */
        rc = ptlrpc_connect_set_flags(imp, ocd, old_connect_flags, exp,
@@ -1668,7 +1669,7 @@ static struct ptlrpc_request *ptlrpc_disconnect_prep_req(struct obd_import *imp)
 
        /* We want client umounts to happen quickly, no matter the
           server state... */
-       req->rq_timeout = min_t(int, req->rq_timeout,
+       req->rq_timeout = min_t(timeout_t, req->rq_timeout,
                                INITIAL_CONNECT_TIMEOUT);
 
        import_set_state(imp, LUSTRE_IMP_CONNECTING);
index 9d1a954..2f696a1 100644 (file)
@@ -503,9 +503,11 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
 {
        struct ptlrpc_service_part      *svcpt = req->rq_rqbd->rqbd_svcpt;
        struct ptlrpc_service           *svc = svcpt->scp_service;
-       int service_time = max_t(int, ktime_get_real_seconds() -
-                                 req->rq_arrival_time.tv_sec, 1);
+       timeout_t service_timeout;
 
+       service_timeout = clamp_t(timeout_t, ktime_get_real_seconds() -
+                                            req->rq_arrival_time.tv_sec, 1,
+                                 (AT_OFF ? obd_timeout * 3 / 2 : at_max));
         if (!(flags & PTLRPC_REPLY_EARLY) &&
             (req->rq_type != PTL_RPC_MSG_ERR) &&
             (req->rq_reqmsg != NULL) &&
@@ -514,7 +516,8 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
                MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))) {
                 /* early replies, errors and recovery requests don't count
                  * toward our service time estimate */
-               int oldse = at_measured(&svcpt->scp_at_estimate, service_time);
+               int oldse = at_measured(&svcpt->scp_at_estimate,
+                                       service_timeout);
 
                if (oldse != 0) {
                        DEBUG_REQ(D_ADAPTTO, req,
@@ -524,7 +527,7 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
                }
         }
         /* Report actual service time for client latency calc */
-        lustre_msg_set_service_time(req->rq_repmsg, service_time);
+       lustre_msg_set_service_timeout(req->rq_repmsg, service_timeout);
        /* Report service time estimate for future client reqs, but report 0
         * (to be ignored by client) if it's an error reply during recovery.
         * b=15815
index 73ff06d..90a13de 100644 (file)
@@ -1285,11 +1285,12 @@ __u32 lustre_msg_get_magic(struct lustre_msg *msg)
        }
 }
 
-__u32 lustre_msg_get_timeout(struct lustre_msg *msg)
+timeout_t lustre_msg_get_timeout(struct lustre_msg *msg)
 {
        switch (msg->lm_magic) {
        case LUSTRE_MSG_MAGIC_V2: {
                struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+
                if (pb == NULL) {
                        CERROR("invalid msg %p: no ptlrpc body!\n", msg);
                        return 0;
@@ -1302,11 +1303,12 @@ __u32 lustre_msg_get_timeout(struct lustre_msg *msg)
        }
 }
 
-__u32 lustre_msg_get_service_time(struct lustre_msg *msg)
+timeout_t lustre_msg_get_service_timeout(struct lustre_msg *msg)
 {
        switch (msg->lm_magic) {
        case LUSTRE_MSG_MAGIC_V2: {
                struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+
                if (pb == NULL) {
                        CERROR("invalid msg %p: no ptlrpc body!\n", msg);
                        return 0;
@@ -1542,11 +1544,13 @@ void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt)
        }
 }
 
-void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout)
+void lustre_msg_set_timeout(struct lustre_msg *msg, timeout_t timeout)
 {
        switch (msg->lm_magic) {
        case LUSTRE_MSG_MAGIC_V2: {
                struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+
+               LASSERT(timeout >= 0);
                LASSERTF(pb != NULL, "invalid msg %p: no ptlrpc body!\n", msg);
                pb->pb_timeout = timeout;
                return;
@@ -1556,13 +1560,16 @@ void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout)
        }
 }
 
-void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time)
+void lustre_msg_set_service_timeout(struct lustre_msg *msg,
+                                   timeout_t service_timeout)
 {
        switch (msg->lm_magic) {
        case LUSTRE_MSG_MAGIC_V2: {
                struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+
+               LASSERT(service_timeout >= 0);
                LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
-               pb->pb_service_time = service_time;
+               pb->pb_service_time = service_timeout;
                return;
        }
        default:
index a19788d..359cccb 100644 (file)
@@ -72,7 +72,7 @@ int ptlrpcd_start(struct ptlrpcd_ctl *pc);
 
 /* client.c */
 void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
-                              unsigned int service_time);
+                              timeout_t service_timeout);
 struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
                                         enum ptlrpc_bulk_op_type type,
                                         unsigned portal,