Whamcloud - gitweb
LU-6780 ptlrpc: Do not resend req with allow_replay 58/15458/3
authorwang di <di.wang@intel.com>
Mon, 29 Jun 2015 17:33:52 +0000 (10:33 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Sat, 4 Jul 2015 01:33:08 +0000 (01:33 +0000)
If the request is allowed to be sent during recovery,
and it is not timeout yet, then we do not need to
resend it in the final stage of recovery.

Unnecessary resend will cause the bulk request to resend the
request with different mbit, but same xid, and on the remote
server side, it will drop such request with same xid, so it
will never get the bulk in this case.

re-enable the multiple MDT failover cases in replay-single.sh

Signed-off-by: wang di <di.wang@intel.com>
Change-Id: I96900448fc16b0e2a336a42c8ff977ec56941427
Reviewed-on: http://review.whamcloud.com/15458
Tested-by: Jenkins
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Liang Zhen <liang.zhen@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/ptlrpc/recover.c
lustre/ptlrpc/service.c
lustre/tests/replay-single.sh

index 495a20e..cbb435e 100644 (file)
@@ -198,14 +198,17 @@ int ptlrpc_resend(struct obd_import *imp)
                 RETURN(-1);
         }
 
-       list_for_each_entry_safe(req, next, &imp->imp_sending_list,
-                                     rq_list) {
+       list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) {
                LASSERTF((long)req > PAGE_CACHE_SIZE && req != LP_POISON,
-                         "req %p bad\n", req);
-                LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
-                if (!ptlrpc_no_resend(req))
-                        ptlrpc_resend_req(req);
-        }
+                        "req %p bad\n", req);
+               LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
+
+               /* If the request is allowed to be sent during replay and it
+                * is not timeout yet, then it does not need to be resent. */
+               if (!ptlrpc_no_resend(req) &&
+                   (req->rq_timedout || !req->rq_allow_replay))
+                       ptlrpc_resend_req(req);
+       }
        spin_unlock(&imp->imp_lock);
 
        RETURN(0);
index 5098c10..c8b67db 100644 (file)
@@ -1912,17 +1912,18 @@ ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt,
                 goto err_req;
         }
 
-        switch(lustre_msg_get_opc(req->rq_reqmsg)) {
-        case MDS_WRITEPAGE:
-        case OST_WRITE:
-                req->rq_bulk_write = 1;
-                break;
-        case MDS_READPAGE:
-        case OST_READ:
-        case MGS_CONFIG_READ:
-                req->rq_bulk_read = 1;
-                break;
-        }
+       switch (lustre_msg_get_opc(req->rq_reqmsg)) {
+       case MDS_WRITEPAGE:
+       case OST_WRITE:
+       case OUT_UPDATE:
+               req->rq_bulk_write = 1;
+               break;
+       case MDS_READPAGE:
+       case OST_READ:
+       case MGS_CONFIG_READ:
+               req->rq_bulk_read = 1;
+               break;
+       }
 
         CDEBUG(D_RPCTRACE, "got req x"LPU64"\n", req->rq_xid);
 
index 7d9eb05..8194adb 100755 (executable)
@@ -24,8 +24,7 @@ require_dsh_mds || exit 0
 # bug number for skipped tests:
 # b=17466/LU-472 : 61d
 # LU-5319 : 53a 53d
-# LU-6780 : 80d 80h 81d 81h 110e 110f 110g 111c 111d 111e 111f 111g 112
-ALWAYS_EXCEPT="61d 53a 53d  80d 80h 81d 81h 110e 110f 110g 111c 111d 111e 111f 111g 112 $REPLAY_SINGLE_EXCEPT"
+ALWAYS_EXCEPT="61d 53a 53d $REPLAY_SINGLE_EXCEPT"
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
 case "$(lsb_release -sr)" in   # only disable tests for el7