Whamcloud - gitweb
Revert "LU-7434 ptlrpc: lost bulk leads to a hang"
[fs/lustre-release.git] / lustre / ptlrpc / client.c
index 59d0827..c9f293c 100644 (file)
@@ -731,8 +731,6 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
        request->rq_reply_cbid.cbid_arg = request;
 
        request->rq_reply_deadline = 0;
-       request->rq_bulk_deadline = 0;
-       request->rq_req_deadline = 0;
        request->rq_phase = RQ_PHASE_NEW;
        request->rq_next_phase = RQ_PHASE_UNDEFINED;
 
@@ -744,35 +742,6 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
        lustre_msg_set_opc(request->rq_reqmsg, opcode);
        ptlrpc_assign_next_xid(request);
 
-       /* Let's setup deadline for req/reply/bulk unlink for opcode. */
-       if (cfs_fail_val == opcode) {
-               time_t *fail_t = NULL, *fail2_t = NULL;
-
-               if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK))
-                       fail_t = &request->rq_bulk_deadline;
-               else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK))
-                       fail_t = &request->rq_reply_deadline;
-               else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK))
-                       fail_t = &request->rq_req_deadline;
-               else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK)) {
-                       fail_t = &request->rq_reply_deadline;
-                       fail2_t = &request->rq_bulk_deadline;
-               }
-
-               if (fail_t) {
-                       *fail_t = cfs_time_current_sec() + LONG_UNLINK;
-
-                       if (fail2_t)
-                               *fail2_t = cfs_time_current_sec() + LONG_UNLINK;
-
-                       /* The RPC is infected, let the test to change the
-                        * fail_loc */
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       schedule_timeout(cfs_time_seconds(2));
-                       set_current_state(TASK_RUNNING);
-               }
-       }
-
        RETURN(0);
 
 out_ctx:
@@ -1735,40 +1704,25 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                if (!(req->rq_phase == RQ_PHASE_RPC ||
                      req->rq_phase == RQ_PHASE_BULK ||
                      req->rq_phase == RQ_PHASE_INTERPRET ||
-                     req->rq_phase == RQ_PHASE_UNREG_RPC ||
-                     req->rq_phase == RQ_PHASE_UNREG_BULK)) {
-                       DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
-                       LBUG();
-               }
+                     req->rq_phase == RQ_PHASE_UNREGISTERING)) {
+                        DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
+                        LBUG();
+                }
 
-               if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
-                   req->rq_phase == RQ_PHASE_UNREG_BULK) {
-                       LASSERT(req->rq_next_phase != req->rq_phase);
-                       LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED);
-
-                       if (req->rq_req_deadline &&
-                           !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK))
-                               req->rq_req_deadline = 0;
-                       if (req->rq_reply_deadline &&
-                           !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK))
-                               req->rq_reply_deadline = 0;
-                       if (req->rq_bulk_deadline &&
-                           !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK))
-                               req->rq_bulk_deadline = 0;
+                if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+                        LASSERT(req->rq_next_phase != req->rq_phase);
+                        LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED);
 
-                       /*
-                        * Skip processing until reply is unlinked. We
-                        * can't return to pool before that and we can't
-                        * call interpret before that. We need to make
-                        * sure that all rdma transfers finished and will
-                        * not corrupt any data.
-                        */
-                       if (req->rq_phase == RQ_PHASE_UNREG_RPC &&
-                           ptlrpc_client_recv_or_unlink(req))
-                               continue;
-                       if (req->rq_phase == RQ_PHASE_UNREG_BULK &&
-                           ptlrpc_client_bulk_active(req))
-                               continue;
+                        /*
+                         * Skip processing until reply is unlinked. We
+                         * can't return to pool before that and we can't
+                         * call interpret before that. We need to make
+                         * sure that all rdma transfers finished and will
+                         * not corrupt any data.
+                         */
+                        if (ptlrpc_client_recv_or_unlink(req) ||
+                            ptlrpc_client_bulk_active(req))
+                                continue;
 
                         /*
                          * Turn fail_loc off to prevent it from looping
@@ -2239,7 +2193,7 @@ static void ptlrpc_interrupted_set(void *data)
                        continue;
 
                if (req->rq_phase != RQ_PHASE_RPC &&
-                   req->rq_phase != RQ_PHASE_UNREG_RPC &&
+                   req->rq_phase != RQ_PHASE_UNREGISTERING &&
                    !req->rq_allow_intr)
                        continue;
 
@@ -2565,11 +2519,12 @@ static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
         */
        LASSERT(!in_interrupt());
 
-       /* Let's setup deadline for reply unlink. */
-       if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-           async && request->rq_reply_deadline == 0 && cfs_fail_val == 0)
-               request->rq_reply_deadline =
-                       cfs_time_current_sec() + LONG_UNLINK;
+       /*
+        * Let's setup deadline for reply unlink.
+        */
+        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+            async && request->rq_reply_deadline == 0)
+                request->rq_reply_deadline = cfs_time_current_sec()+LONG_UNLINK;
 
         /*
          * Nothing left to do.
@@ -2585,8 +2540,10 @@ static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
         if (!ptlrpc_client_recv_or_unlink(request))
                 RETURN(1);
 
-       /* Move to "Unregistering" phase as reply was not unlinked yet. */
-       ptlrpc_rqphase_move(request, RQ_PHASE_UNREG_RPC);
+        /*
+         * Move to "Unregistering" phase as reply was not unlinked yet.
+         */
+        ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING);
 
         /*
          * Do not wait for unlink to finish.
@@ -3316,6 +3273,7 @@ static void ptlrpcd_add_work_req(struct ptlrpc_request *req)
        req->rq_timeout         = obd_timeout;
        req->rq_sent            = cfs_time_current_sec();
        req->rq_deadline        = req->rq_sent + req->rq_timeout;
+       req->rq_reply_deadline  = req->rq_deadline;
        req->rq_phase           = RQ_PHASE_INTERPRET;
        req->rq_next_phase      = RQ_PHASE_COMPLETE;
        req->rq_xid             = ptlrpc_next_xid();