Whamcloud - gitweb
LU-6924 ptlrpc: replay bulk request 93/15793/5
authorwang di <di.wang@intel.com>
Tue, 28 Jul 2015 08:16:52 +0000 (01:16 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 13 Aug 2015 00:05:31 +0000 (00:05 +0000)
Even though the server might already got the bulk
replay request, but bulk transfer timeout, let's
replay the bulk request, i.e. treat such replay as
same as no replied replay request (See
ptlrpc_replay_interpret()).

Signed-off-by: wang di <di.wang@intel.com>
Change-Id: I1f71eacc3a68941c00f16c9628342c662e7fe181
Reviewed-on: http://review.whamcloud.com/15793
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Niu Yawei <yawei.niu@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/ptlrpc/client.c
lustre/target/tgt_main.c

index e03fcf2..a1b15ef 100644 (file)
@@ -2810,10 +2810,15 @@ static int ptlrpc_replay_interpret(const struct lu_env *env,
        ENTRY;
        atomic_dec(&imp->imp_replay_inflight);
 
-        if (!ptlrpc_client_replied(req)) {
-                CERROR("request replay timed out, restarting recovery\n");
-                GOTO(out, rc = -ETIMEDOUT);
-        }
+       /* Note: if it is bulk replay (MDS-MDS replay), then even if
+        * server got the request, but bulk transfer timeout, let's
+        * replay the bulk req again */
+       if (!ptlrpc_client_replied(req) ||
+           (req->rq_bulk != NULL &&
+            lustre_msg_get_status(req->rq_repmsg) == -ETIMEDOUT)) {
+               DEBUG_REQ(D_ERROR, req, "request replay timed out.\n");
+               GOTO(out, rc = -ETIMEDOUT);
+       }
 
         if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR &&
             (lustre_msg_get_status(req->rq_repmsg) == -ENOTCONN ||
index ea628fd..a920716 100644 (file)
@@ -147,8 +147,10 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut,
 
        RETURN(0);
 out:
-       if (lut->lut_last_rcvd != NULL)
+       if (lut->lut_last_rcvd != NULL) {
                lu_object_put(env, &lut->lut_last_rcvd->do_lu);
+               dt_txn_callback_del(lut->lut_bottom, &lut->lut_txn_cb);
+       }
        lut->lut_last_rcvd = NULL;
        if (lut->lut_client_bitmap != NULL)
                OBD_FREE(lut->lut_client_bitmap, LR_MAX_CLIENTS >> 3);