Whamcloud - gitweb
LU-9828 ptlrpc: Do not assert when bd_nob_transferred != 0
[fs/lustre-release.git] / lustre / ptlrpc / niobuf.c
index b5fbd8e..9998690 100644 (file)
@@ -336,8 +336,11 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
        /* cleanup the state of the bulk for it will be reused */
        if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY)
                desc->bd_nob_transferred = 0;
-       else
-               LASSERT(desc->bd_nob_transferred == 0);
+       else if (desc->bd_nob_transferred != 0)
+               /* If the network failed after an RPC was sent, this condition
+                * could happen.  Rather than assert (was here before), return
+                * an EIO error. */
+               RETURN(-EIO);
 
        desc->bd_failure = 0;
 
@@ -440,7 +443,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
        /* Let's setup deadline for reply unlink. */
        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
            async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0)
-               req->rq_bulk_deadline = cfs_time_current_sec() + LONG_UNLINK;
+               req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK;
 
        if (ptlrpc_client_bulk_active(req) == 0)        /* completed or */
                RETURN(1);                              /* never registered */
@@ -489,7 +492,7 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
 {
        struct ptlrpc_service_part      *svcpt = req->rq_rqbd->rqbd_svcpt;
        struct ptlrpc_service           *svc = svcpt->scp_service;
-        int service_time = max_t(int, cfs_time_current_sec() -
+       int service_time = max_t(int, ktime_get_real_seconds() -
                                  req->rq_arrival_time.tv_sec, 1);
 
         if (!(flags & PTLRPC_REPLY_EARLY) &&
@@ -511,27 +514,30 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
         }
         /* Report actual service time for client latency calc */
         lustre_msg_set_service_time(req->rq_repmsg, service_time);
-        /* Report service time estimate for future client reqs, but report 0
+       /* Report service time estimate for future client reqs, but report 0
         * (to be ignored by client) if it's an error reply during recovery.
-         * (bz15815) */
-        if (req->rq_type == PTL_RPC_MSG_ERR &&
+        * b=15815
+        */
+       if (req->rq_type == PTL_RPC_MSG_ERR &&
            (req->rq_export == NULL ||
             req->rq_export->exp_obd->obd_recovering)) {
-                lustre_msg_set_timeout(req->rq_repmsg, 0);
+               lustre_msg_set_timeout(req->rq_repmsg, 0);
        } else {
-               __u32 timeout;
+               time64_t timeout;
 
                if (req->rq_export && req->rq_reqmsg != NULL &&
                    (flags & PTLRPC_REPLY_EARLY) &&
                    lustre_msg_get_flags(req->rq_reqmsg) &
-                   (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))
-                       timeout = cfs_time_current_sec() -
-                               req->rq_arrival_time.tv_sec +
-                               min(at_extra,
-                                   req->rq_export->exp_obd->
-                                   obd_recovery_timeout / 4);
-               else
+                   (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
+                       struct obd_device *exp_obd = req->rq_export->exp_obd;
+
+                       timeout = ktime_get_real_seconds() -
+                                 req->rq_arrival_time.tv_sec +
+                                 min_t(time64_t, at_extra,
+                                       exp_obd->obd_recovery_timeout / 4);
+               } else {
                        timeout = at_get(&svcpt->scp_at_estimate);
+               }
                lustre_msg_set_timeout(req->rq_repmsg, timeout);
        }
 
@@ -621,7 +627,7 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
         if (unlikely(rc))
                 goto out;
 
-        req->rq_sent = cfs_time_current_sec();
+       req->rq_sent = ktime_get_real_seconds();
 
        rc = ptl_send_buf(&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len,
                          (rs->rs_difficult && !rs->rs_no_ack) ?
@@ -778,10 +784,6 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                 mpflag = cfs_memory_pressure_get_and_set();
 
        rc = sptlrpc_cli_wrap_request(request);
-       if (rc == -ENOMEM)
-               /* set rq_sent so that this request is treated
-                * as a delayed send in the upper layers */
-               request->rq_sent = cfs_time_current_sec();
        if (rc)
                GOTO(out, rc);
 
@@ -884,8 +886,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 
        OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
 
-       do_gettimeofday(&request->rq_sent_tv);
-       request->rq_sent = cfs_time_current_sec();
+       request->rq_sent_ns = ktime_get_real();
+       request->rq_sent = ktime_get_real_seconds();
        /* We give the server rq_timeout secs to process the req, and
           add the network latency for our local timeout. */
         request->rq_deadline = request->rq_sent + request->rq_timeout +
@@ -923,9 +925,16 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
          * the chance to have long unlink to sluggish net is smaller here. */
         ptlrpc_unregister_bulk(request, 0);
  out:
-        if (request->rq_memalloc)
-                cfs_memory_pressure_restore(mpflag);
-        return rc;
+       if (rc == -ENOMEM) {
+               /* set rq_sent so that this request is treated
+                * as a delayed send in the upper layers */
+               request->rq_sent = ktime_get_real_seconds();
+       }
+
+       if (request->rq_memalloc)
+               cfs_memory_pressure_restore(mpflag);
+
+       return rc;
 }
 EXPORT_SYMBOL(ptl_send_rpc);