From: wang di Date: Mon, 29 Jun 2015 17:33:52 +0000 (-0700) Subject: LU-6780 ptlrpc: Do not resend req with allow_replay X-Git-Tag: 2.7.56~4 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=0ee3487737bd876e233213ccec4e6fca4690093e LU-6780 ptlrpc: Do not resend req with allow_replay If the request is allowed to be sent during recovery, and it is not timeout yet, then we do not need to resend it in the final stage of recovery. Unnecessary resend will cause the bulk request to resend the request with different mbit, but same xid, and on the remote server side, it will drop such request with same xid, so it will never get the bulk in this case. re-enable the multiple MDT failover cases in replay-single.sh Signed-off-by: wang di Change-Id: I96900448fc16b0e2a336a42c8ff977ec56941427 Reviewed-on: http://review.whamcloud.com/15458 Tested-by: Jenkins Reviewed-by: Alex Zhuravlev Reviewed-by: Liang Zhen Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 495a20e..cbb435e 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -198,14 +198,17 @@ int ptlrpc_resend(struct obd_import *imp) RETURN(-1); } - list_for_each_entry_safe(req, next, &imp->imp_sending_list, - rq_list) { + list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) { LASSERTF((long)req > PAGE_CACHE_SIZE && req != LP_POISON, - "req %p bad\n", req); - LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req); - if (!ptlrpc_no_resend(req)) - ptlrpc_resend_req(req); - } + "req %p bad\n", req); + LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req); + + /* If the request is allowed to be sent during replay and it + * is not timeout yet, then it does not need to be resent. */ + if (!ptlrpc_no_resend(req) && + (req->rq_timedout || !req->rq_allow_replay)) + ptlrpc_resend_req(req); + } spin_unlock(&imp->imp_lock); RETURN(0); diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 5098c10..c8b67db 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1912,17 +1912,18 @@ ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt, goto err_req; } - switch(lustre_msg_get_opc(req->rq_reqmsg)) { - case MDS_WRITEPAGE: - case OST_WRITE: - req->rq_bulk_write = 1; - break; - case MDS_READPAGE: - case OST_READ: - case MGS_CONFIG_READ: - req->rq_bulk_read = 1; - break; - } + switch (lustre_msg_get_opc(req->rq_reqmsg)) { + case MDS_WRITEPAGE: + case OST_WRITE: + case OUT_UPDATE: + req->rq_bulk_write = 1; + break; + case MDS_READPAGE: + case OST_READ: + case MGS_CONFIG_READ: + req->rq_bulk_read = 1; + break; + } CDEBUG(D_RPCTRACE, "got req x"LPU64"\n", req->rq_xid); diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 7d9eb05..8194adb 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -24,8 +24,7 @@ require_dsh_mds || exit 0 # bug number for skipped tests: # b=17466/LU-472 : 61d # LU-5319 : 53a 53d -# LU-6780 : 80d 80h 81d 81h 110e 110f 110g 111c 111d 111e 111f 111g 112 -ALWAYS_EXCEPT="61d 53a 53d 80d 80h 81d 81h 110e 110f 110g 111c 111d 111e 111f 111g 112 $REPLAY_SINGLE_EXCEPT" +ALWAYS_EXCEPT="61d 53a 53d $REPLAY_SINGLE_EXCEPT" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! case "$(lsb_release -sr)" in # only disable tests for el7